Ruby  2.1.3p242(2014-09-19revision47630)
encoding.c
Go to the documentation of this file.
1 /**********************************************************************
2 
3  encoding.c -
4 
5  $Author: nagachika $
6  created at: Thu May 24 17:23:27 JST 2007
7 
8  Copyright (C) 2007 Yukihiro Matsumoto
9 
10 **********************************************************************/
11 
12 #include "ruby/ruby.h"
13 #include "ruby/encoding.h"
14 #include "internal.h"
15 #include "regenc.h"
16 #include <ctype.h>
17 #include "ruby/util.h"
18 
19 #undef rb_ascii8bit_encindex
20 #undef rb_utf8_encindex
21 #undef rb_usascii_encindex
22 
23 #if defined __GNUC__ && __GNUC__ >= 4
24 #pragma GCC visibility push(default)
25 int rb_enc_register(const char *name, rb_encoding *encoding);
26 void rb_enc_set_base(const char *name, const char *orig);
27 int rb_enc_set_dummy(int index);
28 void rb_encdb_declare(const char *name);
29 int rb_encdb_replicate(const char *name, const char *orig);
30 int rb_encdb_dummy(const char *name);
31 int rb_encdb_alias(const char *alias, const char *orig);
32 void rb_encdb_set_unicode(int index);
33 #pragma GCC visibility pop
34 #endif
35 
36 static ID id_encoding;
39 
41  const char *name;
44 };
45 
46 static struct {
48  int count;
49  int size;
51 } enc_table;
52 
53 void rb_enc_init(void);
54 
55 #define ENCODING_COUNT ENCINDEX_BUILTIN_MAX
56 #define UNSPECIFIED_ENCODING INT_MAX
57 
58 #define ENCODING_NAMELEN_MAX 63
59 #define valid_encoding_name_p(name) ((name) && strlen(name) <= ENCODING_NAMELEN_MAX)
60 
61 #define enc_autoload_p(enc) (!rb_enc_mbmaxlen(enc))
62 
63 static int load_encoding(const char *name);
64 
65 static size_t
66 enc_memsize(const void *p)
67 {
68  return 0;
69 }
70 
72  "encoding",
73  {0, 0, enc_memsize,},
75 };
76 
77 #define is_data_encoding(obj) (RTYPEDDATA_P(obj) && RTYPEDDATA_TYPE(obj) == &encoding_data_type)
78 #define is_obj_encoding(obj) (RB_TYPE_P((obj), T_DATA) && is_data_encoding(obj))
79 
80 static VALUE
81 enc_new(rb_encoding *encoding)
82 {
83  return TypedData_Wrap_Struct(rb_cEncoding, &encoding_data_type, encoding);
84 }
85 
86 static VALUE
88 {
89  VALUE list, enc;
90 
91  if (!(list = rb_encoding_list)) {
92  rb_bug("rb_enc_from_encoding_index(%d): no rb_encoding_list", idx);
93  }
94  enc = rb_ary_entry(list, idx);
95  if (NIL_P(enc)) {
96  rb_bug("rb_enc_from_encoding_index(%d): not created yet", idx);
97  }
98  return enc;
99 }
100 
101 VALUE
103 {
104  int idx;
105  if (!encoding) return Qnil;
106  idx = ENC_TO_ENCINDEX(encoding);
107  return rb_enc_from_encoding_index(idx);
108 }
109 
110 static int enc_autoload(rb_encoding *);
111 
112 static int
114 {
115  int index = rb_enc_to_index(enc);
116  if (rb_enc_from_index(index) != enc)
117  return -1;
118  if (enc_autoload_p(enc)) {
119  index = enc_autoload(enc);
120  }
121  return index;
122 }
123 
124 static int
126 {
127  if (!is_obj_encoding(obj)) {
128  return -1;
129  }
130  return check_encoding(RDATA(obj)->data);
131 }
132 
133 NORETURN(static void not_encoding(VALUE enc));
134 static void
136 {
137  rb_raise(rb_eTypeError, "wrong argument type %"PRIsVALUE" (expected Encoding)",
138  rb_obj_class(enc));
139 }
140 
141 static rb_encoding *
143 {
144  int index = enc_check_encoding(enc);
145  if (index < 0) {
146  not_encoding(enc);
147  }
148  return DATA_PTR(enc);
149 }
150 
151 static rb_encoding *
152 must_encindex(int index)
153 {
155  if (!enc) {
156  rb_raise(rb_eEncodingError, "encoding index out of bound: %d",
157  index);
158  }
159  if (ENC_TO_ENCINDEX(enc) != (int)(index & ENC_INDEX_MASK)) {
160  rb_raise(rb_eEncodingError, "wrong encoding index %d for %s (expected %d)",
161  index, rb_enc_name(enc), ENC_TO_ENCINDEX(enc));
162  }
163  if (enc_autoload_p(enc) && enc_autoload(enc) == -1) {
164  rb_loaderror("failed to load encoding (%s)",
165  rb_enc_name(enc));
166  }
167  return enc;
168 }
169 
170 int
172 {
173  int idx;
174 
175  idx = enc_check_encoding(enc);
176  if (idx >= 0) {
177  return idx;
178  }
179  else if (NIL_P(enc = rb_check_string_type(enc))) {
180  return -1;
181  }
182  if (!rb_enc_asciicompat(rb_enc_get(enc))) {
183  return -1;
184  }
185  return rb_enc_find_index(StringValueCStr(enc));
186 }
187 
188 /* Returns encoding index or UNSPECIFIED_ENCODING */
189 static int
191 {
192  int idx;
193 
194  StringValue(enc);
195  if (!rb_enc_asciicompat(rb_enc_get(enc))) {
196  rb_raise(rb_eArgError, "invalid name encoding (non ASCII)");
197  }
199  return idx;
200 }
201 
202 static int
204 {
205  int idx = str_find_encindex(enc);
206  if (idx < 0) {
207  rb_raise(rb_eArgError, "unknown encoding name - %"PRIsVALUE, enc);
208  }
209  return idx;
210 }
211 
212 static rb_encoding *
214 {
215  return rb_enc_from_index(str_to_encindex(enc));
216 }
217 
218 rb_encoding *
220 {
221  if (enc_check_encoding(enc) >= 0) return RDATA(enc)->data;
222  return str_to_encoding(enc);
223 }
224 
225 rb_encoding *
227 {
228  int idx;
229  if (enc_check_encoding(enc) >= 0) return RDATA(enc)->data;
230  idx = str_find_encindex(enc);
231  if (idx < 0) return NULL;
232  return rb_enc_from_index(idx);
233 }
234 
235 void
237 {
238 }
239 
240 static int
241 enc_table_expand(int newsize)
242 {
243  struct rb_encoding_entry *ent;
244  int count = newsize;
245 
246  if (enc_table.size >= newsize) return newsize;
247  newsize = (newsize + 7) / 8 * 8;
248  ent = realloc(enc_table.list, sizeof(*enc_table.list) * newsize);
249  if (!ent) return -1;
250  memset(ent + enc_table.size, 0, sizeof(*ent)*(newsize - enc_table.size));
251  enc_table.list = ent;
252  enc_table.size = newsize;
253  return count;
254 }
255 
256 static int
257 enc_register_at(int index, const char *name, rb_encoding *encoding)
258 {
259  struct rb_encoding_entry *ent = &enc_table.list[index];
260  VALUE list;
261 
262  if (!valid_encoding_name_p(name)) return -1;
263  if (!ent->name) {
264  ent->name = name = strdup(name);
265  }
266  else if (STRCASECMP(name, ent->name)) {
267  return -1;
268  }
269  if (!ent->enc) {
270  ent->enc = xmalloc(sizeof(rb_encoding));
271  }
272  if (encoding) {
273  *ent->enc = *encoding;
274  }
275  else {
276  memset(ent->enc, 0, sizeof(*ent->enc));
277  }
278  encoding = ent->enc;
279  encoding->name = name;
280  encoding->ruby_encoding_index = index;
281  st_insert(enc_table.names, (st_data_t)name, (st_data_t)index);
282  list = rb_encoding_list;
283  if (list && NIL_P(rb_ary_entry(list, index))) {
284  /* initialize encoding data */
285  rb_ary_store(list, index, enc_new(encoding));
286  }
287  return index;
288 }
289 
290 static int
291 enc_register(const char *name, rb_encoding *encoding)
292 {
293  int index = enc_table.count;
294 
295  if ((index = enc_table_expand(index + 1)) < 0) return -1;
296  enc_table.count = index;
297  return enc_register_at(index - 1, name, encoding);
298 }
299 
300 static void set_encoding_const(const char *, rb_encoding *);
301 int rb_enc_registered(const char *name);
302 
303 int
304 rb_enc_register(const char *name, rb_encoding *encoding)
305 {
306  int index = rb_enc_registered(name);
307 
308  if (index >= 0) {
309  rb_encoding *oldenc = rb_enc_from_index(index);
310  if (STRCASECMP(name, rb_enc_name(oldenc))) {
311  index = enc_register(name, encoding);
312  }
313  else if (enc_autoload_p(oldenc) || !ENC_DUMMY_P(oldenc)) {
314  enc_register_at(index, name, encoding);
315  }
316  else {
317  rb_raise(rb_eArgError, "encoding %s is already registered", name);
318  }
319  }
320  else {
321  index = enc_register(name, encoding);
323  }
324  return index;
325 }
326 
327 void
328 rb_encdb_declare(const char *name)
329 {
330  int idx = rb_enc_registered(name);
331  if (idx < 0) {
332  idx = enc_register(name, 0);
333  }
335 }
336 
337 static void
339 {
340  if (rb_enc_registered(name) >= 0) {
341  rb_raise(rb_eArgError, "encoding %s is already registered", name);
342  }
343 }
344 
345 static rb_encoding*
347 {
348  rb_encoding *enc = enc_table.list[index].enc;
349 
350  enc_table.list[index].base = base;
351  if (rb_enc_dummy_p(base)) ENC_SET_DUMMY(enc);
352  return enc;
353 }
354 
355 /* for encdb.h
356  * Set base encoding for encodings which are not replicas
357  * but not in their own files.
358  */
359 void
360 rb_enc_set_base(const char *name, const char *orig)
361 {
362  int idx = rb_enc_registered(name);
363  int origidx = rb_enc_registered(orig);
364  set_base_encoding(idx, rb_enc_from_index(origidx));
365 }
366 
367 /* for encdb.h
368  * Set encoding dummy.
369  */
370 int
372 {
373  rb_encoding *enc = enc_table.list[index].enc;
374 
375  ENC_SET_DUMMY(enc);
376  return index;
377 }
378 
379 int
380 rb_enc_replicate(const char *name, rb_encoding *encoding)
381 {
382  int idx;
383 
384  enc_check_duplication(name);
385  idx = enc_register(name, encoding);
386  set_base_encoding(idx, encoding);
388  return idx;
389 }
390 
391 /*
392  * call-seq:
393  * enc.replicate(name) -> encoding
394  *
395  * Returns a replicated encoding of _enc_ whose name is _name_.
396  * The new encoding should have the same byte structure of _enc_.
397  * If _name_ is used by another encoding, raise ArgumentError.
398  *
399  */
400 static VALUE
402 {
405  rb_to_encoding(encoding)));
406 }
407 
408 static int
409 enc_replicate_with_index(const char *name, rb_encoding *origenc, int idx)
410 {
411  if (idx < 0) {
412  idx = enc_register(name, origenc);
413  }
414  else {
415  idx = enc_register_at(idx, name, origenc);
416  }
417  if (idx >= 0) {
418  set_base_encoding(idx, origenc);
420  }
421  return idx;
422 }
423 
424 int
425 rb_encdb_replicate(const char *name, const char *orig)
426 {
427  int origidx = rb_enc_registered(orig);
428  int idx = rb_enc_registered(name);
429 
430  if (origidx < 0) {
431  origidx = enc_register(orig, 0);
432  }
433  return enc_replicate_with_index(name, rb_enc_from_index(origidx), idx);
434 }
435 
436 int
438 {
439  int index = rb_enc_replicate(name, rb_ascii8bit_encoding());
440  rb_encoding *enc = enc_table.list[index].enc;
441 
442  ENC_SET_DUMMY(enc);
443  return index;
444 }
445 
446 int
447 rb_encdb_dummy(const char *name)
448 {
450  rb_enc_registered(name));
451  rb_encoding *enc = enc_table.list[index].enc;
452 
453  ENC_SET_DUMMY(enc);
454  return index;
455 }
456 
457 /*
458  * call-seq:
459  * enc.dummy? -> true or false
460  *
461  * Returns true for dummy encodings.
462  * A dummy encoding is an encoding for which character handling is not properly
463  * implemented.
464  * It is used for stateful encodings.
465  *
466  * Encoding::ISO_2022_JP.dummy? #=> true
467  * Encoding::UTF_8.dummy? #=> false
468  *
469  */
470 static VALUE
472 {
473  return ENC_DUMMY_P(must_encoding(enc)) ? Qtrue : Qfalse;
474 }
475 
476 /*
477  * call-seq:
478  * enc.ascii_compatible? -> true or false
479  *
480  * Returns whether ASCII-compatible or not.
481  *
482  * Encoding::UTF_8.ascii_compatible? #=> true
483  * Encoding::UTF_16BE.ascii_compatible? #=> false
484  *
485  */
486 static VALUE
488 {
489  return rb_enc_asciicompat(must_encoding(enc)) ? Qtrue : Qfalse;
490 }
491 
492 /*
493  * Returns 1 when the encoding is Unicode series other than UTF-7 else 0.
494  */
495 int
497 {
498  return ONIGENC_IS_UNICODE(enc);
499 }
500 
501 static st_data_t
503 {
504  return (st_data_t)strdup((const char *)name);
505 }
506 
507 /*
508  * Returns copied alias name when the key is added for st_table,
509  * else returns NULL.
510  */
511 static int
512 enc_alias_internal(const char *alias, int idx)
513 {
514  return st_insert2(enc_table.names, (st_data_t)alias, (st_data_t)idx,
515  enc_dup_name);
516 }
517 
518 static int
519 enc_alias(const char *alias, int idx)
520 {
521  if (!valid_encoding_name_p(alias)) return -1;
522  if (!enc_alias_internal(alias, idx))
524  return idx;
525 }
526 
527 int
528 rb_enc_alias(const char *alias, const char *orig)
529 {
530  int idx;
531 
532  enc_check_duplication(alias);
533  if (!enc_table.list) {
534  rb_enc_init();
535  }
536  if ((idx = rb_enc_find_index(orig)) < 0) {
537  return -1;
538  }
539  return enc_alias(alias, idx);
540 }
541 
542 int
543 rb_encdb_alias(const char *alias, const char *orig)
544 {
545  int idx = rb_enc_registered(orig);
546 
547  if (idx < 0) {
548  idx = enc_register(orig, 0);
549  }
550  return enc_alias(alias, idx);
551 }
552 
553 void
555 {
557 }
558 
561 
562 void
564 {
566  if (!enc_table.names) {
568  }
569 #define ENC_REGISTER(enc) enc_register_at(ENCINDEX_##enc, rb_enc_name(&OnigEncoding##enc), &OnigEncoding##enc)
572  ENC_REGISTER(US_ASCII);
573 #undef ENC_REGISTER
574 #define ENCDB_REGISTER(name, enc) enc_register_at(ENCINDEX_##enc, name, NULL)
575  ENCDB_REGISTER("UTF-16BE", UTF_16BE);
576  ENCDB_REGISTER("UTF-16LE", UTF_16LE);
577  ENCDB_REGISTER("UTF-32BE", UTF_32BE);
578  ENCDB_REGISTER("UTF-32LE", UTF_32LE);
579  ENCDB_REGISTER("UTF-16", UTF_16);
580  ENCDB_REGISTER("UTF-32", UTF_32);
581  ENCDB_REGISTER("UTF8-MAC", UTF8_MAC);
582 
583  ENCDB_REGISTER("EUC-JP", EUC_JP);
584  ENCDB_REGISTER("Windows-31J", Windows_31J);
585 #undef ENCDB_REGISTER
586  enc_table.count = ENCINDEX_BUILTIN_MAX;
587 }
588 
589 rb_encoding *
591 {
592  if (!enc_table.list) {
593  rb_enc_init();
594  }
595  if (index < 0 || enc_table.count <= (index &= ENC_INDEX_MASK)) {
596  return 0;
597  }
598  return enc_table.list[index].enc;
599 }
600 
601 rb_encoding *
603 {
604  return must_encindex(index);
605 }
606 
607 int
609 {
610  st_data_t idx = 0;
611 
612  if (!name) return -1;
613  if (!enc_table.list) return -1;
614  if (st_lookup(enc_table.names, (st_data_t)name, &idx)) {
615  return (int)idx;
616  }
617  return -1;
618 }
619 
620 static VALUE
622 {
623  int safe = rb_safe_level();
624  return rb_require_safe(enclib, safe > 3 ? 3 : safe);
625 }
626 
627 static int
628 load_encoding(const char *name)
629 {
630  VALUE enclib = rb_sprintf("enc/%s.so", name);
631  VALUE verbose = ruby_verbose;
633  VALUE errinfo;
634  VALUE loaded;
635  char *s = RSTRING_PTR(enclib) + 4, *e = RSTRING_END(enclib) - 3;
636  int idx;
637 
638  while (s < e) {
639  if (!ISALNUM(*s)) *s = '_';
640  else if (ISUPPER(*s)) *s = (char)TOLOWER(*s);
641  ++s;
642  }
643  FL_UNSET(enclib, FL_TAINT);
644  OBJ_FREEZE(enclib);
646  ruby_debug = Qfalse;
647  errinfo = rb_errinfo();
648  loaded = rb_protect(require_enc, enclib, 0);
649  ruby_verbose = verbose;
650  ruby_debug = debug;
651  rb_set_errinfo(errinfo);
652  if (NIL_P(loaded)) return -1;
653  if ((idx = rb_enc_registered(name)) < 0) return -1;
654  if (enc_autoload_p(enc_table.list[idx].enc)) return -1;
655  return idx;
656 }
657 
658 static int
660 {
661  int i;
662  rb_encoding *base = enc_table.list[ENC_TO_ENCINDEX(enc)].base;
663 
664  if (base) {
665  i = 0;
666  do {
667  if (i >= enc_table.count) return -1;
668  } while (enc_table.list[i].enc != base && (++i, 1));
669  if (enc_autoload_p(base)) {
670  if (enc_autoload(base) < 0) return -1;
671  }
672  i = enc->ruby_encoding_index;
673  enc_register_at(i & ENC_INDEX_MASK, rb_enc_name(enc), base);
674  enc->ruby_encoding_index = i;
675  }
676  else {
677  i = load_encoding(rb_enc_name(enc));
678  }
679  return i;
680 }
681 
682 /* Return encoding index or UNSPECIFIED_ENCODING from encoding name */
683 int
685 {
686  int i = rb_enc_registered(name);
687  rb_encoding *enc;
688 
689  if (i < 0) {
690  i = load_encoding(name);
691  }
692  else if (!(enc = rb_enc_from_index(i))) {
693  if (i != UNSPECIFIED_ENCODING) {
694  rb_raise(rb_eArgError, "encoding %s is not registered", name);
695  }
696  }
697  else if (enc_autoload_p(enc)) {
698  if (enc_autoload(enc) < 0) {
699  rb_warn("failed to load encoding (%s); use ASCII-8BIT instead",
700  name);
701  return 0;
702  }
703  }
704  return i;
705 }
706 
707 rb_encoding *
708 rb_enc_find(const char *name)
709 {
710  int idx = rb_enc_find_index(name);
711  if (idx < 0) idx = 0;
712  return rb_enc_from_index(idx);
713 }
714 
715 static inline int
717 {
718  if (SPECIAL_CONST_P(obj)) return SYMBOL_P(obj);
719  switch (BUILTIN_TYPE(obj)) {
720  case T_STRING:
721  case T_REGEXP:
722  case T_FILE:
723  return TRUE;
724  case T_DATA:
725  if (is_data_encoding(obj)) return TRUE;
726  default:
727  return FALSE;
728  }
729 }
730 
731 ID
733 {
734  CONST_ID(id_encoding, "encoding");
735  return id_encoding;
736 }
737 
738 int
740 {
741  int i = -1;
742  VALUE tmp;
743 
744  if (SPECIAL_CONST_P(obj)) {
745  if (!SYMBOL_P(obj)) return -1;
746  obj = rb_id2str(SYM2ID(obj));
747  }
748  switch (BUILTIN_TYPE(obj)) {
749  as_default:
750  default:
751  case T_STRING:
752  case T_REGEXP:
753  i = ENCODING_GET_INLINED(obj);
754  if (i == ENCODING_INLINE_MAX) {
755  VALUE iv;
756 
757  iv = rb_ivar_get(obj, rb_id_encoding());
758  i = NUM2INT(iv);
759  }
760  break;
761  case T_FILE:
762  tmp = rb_funcall(obj, rb_intern("internal_encoding"), 0, 0);
763  if (NIL_P(tmp)) obj = rb_funcall(obj, rb_intern("external_encoding"), 0, 0);
764  else obj = tmp;
765  if (NIL_P(obj)) break;
766  case T_DATA:
767  if (is_data_encoding(obj)) {
768  i = enc_check_encoding(obj);
769  }
770  else {
771  goto as_default;
772  }
773  break;
774  }
775  return i;
776 }
777 
778 static void
779 enc_set_index(VALUE obj, int idx)
780 {
781  if (idx < ENCODING_INLINE_MAX) {
782  ENCODING_SET_INLINED(obj, idx);
783  return;
784  }
786  rb_ivar_set(obj, rb_id_encoding(), INT2NUM(idx));
787 }
788 
789 void
790 rb_enc_set_index(VALUE obj, int idx)
791 {
792  rb_check_frozen(obj);
793  must_encindex(idx);
794  enc_set_index(obj, idx);
795 }
796 
797 VALUE
799 {
800  rb_encoding *enc;
801  int oldidx, oldtermlen, termlen;
802 
803 /* enc_check_capable(obj);*/
804  rb_check_frozen(obj);
805  oldidx = rb_enc_get_index(obj);
806  if (oldidx == idx)
807  return obj;
808  if (SPECIAL_CONST_P(obj)) {
809  rb_raise(rb_eArgError, "cannot set encoding");
810  }
811  enc = must_encindex(idx);
812  if (!ENC_CODERANGE_ASCIIONLY(obj) ||
813  !rb_enc_asciicompat(enc)) {
814  ENC_CODERANGE_CLEAR(obj);
815  }
816  termlen = rb_enc_mbminlen(enc);
817  oldtermlen = rb_enc_mbminlen(rb_enc_from_index(oldidx));
818  if (oldtermlen < termlen && RB_TYPE_P(obj, T_STRING)) {
819  rb_str_fill_terminator(obj, termlen);
820  }
821  enc_set_index(obj, idx);
822  return obj;
823 }
824 
825 VALUE
827 {
828  return rb_enc_associate_index(obj, rb_enc_to_index(enc));
829 }
830 
833 {
834  return rb_enc_from_index(rb_enc_get_index(obj));
835 }
836 
839 {
840  rb_encoding *enc = rb_enc_compatible(str1, str2);
841  if (!enc)
842  rb_raise(rb_eEncCompatError, "incompatible character encodings: %s and %s",
843  rb_enc_name(rb_enc_get(str1)),
844  rb_enc_name(rb_enc_get(str2)));
845  return enc;
846 }
847 
850 {
851  int idx1, idx2;
852  rb_encoding *enc1, *enc2;
853  int isstr1, isstr2;
854 
855  idx1 = rb_enc_get_index(str1);
856  idx2 = rb_enc_get_index(str2);
857 
858  if (idx1 < 0 || idx2 < 0)
859  return 0;
860 
861  if (idx1 == idx2) {
862  return rb_enc_from_index(idx1);
863  }
864  enc1 = rb_enc_from_index(idx1);
865  enc2 = rb_enc_from_index(idx2);
866 
867  isstr2 = RB_TYPE_P(str2, T_STRING);
868  if (isstr2 && RSTRING_LEN(str2) == 0)
869  return enc1;
870  isstr1 = RB_TYPE_P(str1, T_STRING);
871  if (isstr1 && RSTRING_LEN(str1) == 0)
872  return (rb_enc_asciicompat(enc1) && rb_enc_str_asciionly_p(str2)) ? enc1 : enc2;
873  if (!rb_enc_asciicompat(enc1) || !rb_enc_asciicompat(enc2)) {
874  return 0;
875  }
876 
877  /* objects whose encoding is the same of contents */
878  if (!isstr2 && idx2 == ENCINDEX_US_ASCII)
879  return enc1;
880  if (!isstr1 && idx1 == ENCINDEX_US_ASCII)
881  return enc2;
882 
883  if (!isstr1) {
884  VALUE tmp = str1;
885  int idx0 = idx1;
886  str1 = str2;
887  str2 = tmp;
888  idx1 = idx2;
889  idx2 = idx0;
890  idx0 = isstr1;
891  isstr1 = isstr2;
892  isstr2 = idx0;
893  }
894  if (isstr1) {
895  int cr1, cr2;
896 
897  cr1 = rb_enc_str_coderange(str1);
898  if (isstr2) {
899  cr2 = rb_enc_str_coderange(str2);
900  if (cr1 != cr2) {
901  /* may need to handle ENC_CODERANGE_BROKEN */
902  if (cr1 == ENC_CODERANGE_7BIT) return enc2;
903  if (cr2 == ENC_CODERANGE_7BIT) return enc1;
904  }
905  if (cr2 == ENC_CODERANGE_7BIT) {
906  return enc1;
907  }
908  }
909  if (cr1 == ENC_CODERANGE_7BIT)
910  return enc2;
911  }
912  return 0;
913 }
914 
915 void
917 {
919 }
920 
921 
922 /*
923  * call-seq:
924  * obj.encoding -> encoding
925  *
926  * Returns the Encoding object that represents the encoding of obj.
927  */
928 
929 VALUE
931 {
932  int idx = rb_enc_get_index(obj);
933  if (idx < 0) {
934  rb_raise(rb_eTypeError, "unknown encoding");
935  }
937 }
938 
939 int
940 rb_enc_fast_mbclen(const char *p, const char *e, rb_encoding *enc)
941 {
942  return ONIGENC_MBC_ENC_LEN(enc, (UChar*)p, (UChar*)e);
943 }
944 
945 int
946 rb_enc_mbclen(const char *p, const char *e, rb_encoding *enc)
947 {
948  int n = ONIGENC_PRECISE_MBC_ENC_LEN(enc, (UChar*)p, (UChar*)e);
949  if (MBCLEN_CHARFOUND_P(n) && MBCLEN_CHARFOUND_LEN(n) <= e-p)
950  return MBCLEN_CHARFOUND_LEN(n);
951  else {
952  int min = rb_enc_mbminlen(enc);
953  return min <= e-p ? min : (int)(e-p);
954  }
955 }
956 
957 int
958 rb_enc_precise_mbclen(const char *p, const char *e, rb_encoding *enc)
959 {
960  int n;
961  if (e <= p)
963  n = ONIGENC_PRECISE_MBC_ENC_LEN(enc, (UChar*)p, (UChar*)e);
964  if (e-p < n)
965  return ONIGENC_CONSTRUCT_MBCLEN_NEEDMORE(n-(int)(e-p));
966  return n;
967 }
968 
969 int
970 rb_enc_ascget(const char *p, const char *e, int *len, rb_encoding *enc)
971 {
972  unsigned int c, l;
973  if (e <= p)
974  return -1;
975  if (rb_enc_asciicompat(enc)) {
976  c = (unsigned char)*p;
977  if (!ISASCII(c))
978  return -1;
979  if (len) *len = 1;
980  return c;
981  }
982  l = rb_enc_precise_mbclen(p, e, enc);
983  if (!MBCLEN_CHARFOUND_P(l))
984  return -1;
985  c = rb_enc_mbc_to_codepoint(p, e, enc);
986  if (!rb_enc_isascii(c, enc))
987  return -1;
988  if (len) *len = l;
989  return c;
990 }
991 
992 unsigned int
993 rb_enc_codepoint_len(const char *p, const char *e, int *len_p, rb_encoding *enc)
994 {
995  int r;
996  if (e <= p)
997  rb_raise(rb_eArgError, "empty string");
998  r = rb_enc_precise_mbclen(p, e, enc);
999  if (!MBCLEN_CHARFOUND_P(r)) {
1000  rb_raise(rb_eArgError, "invalid byte sequence in %s", rb_enc_name(enc));
1001  }
1002  if (len_p) *len_p = MBCLEN_CHARFOUND_LEN(r);
1003  return rb_enc_mbc_to_codepoint(p, e, enc);
1004 }
1005 
1006 #undef rb_enc_codepoint
1007 unsigned int
1008 rb_enc_codepoint(const char *p, const char *e, rb_encoding *enc)
1009 {
1010  return rb_enc_codepoint_len(p, e, 0, enc);
1011 }
1012 
1013 int
1015 {
1016  int n = ONIGENC_CODE_TO_MBCLEN(enc,c);
1017  if (n == 0) {
1018  rb_raise(rb_eArgError, "invalid codepoint 0x%x in %s", c, rb_enc_name(enc));
1019  }
1020  return n;
1021 }
1022 
1023 #undef rb_enc_code_to_mbclen
1024 int
1026 {
1027  return ONIGENC_CODE_TO_MBCLEN(enc, code);
1028 }
1029 
1030 int
1032 {
1034 }
1035 
1036 int
1038 {
1040 }
1041 
1042 /*
1043  * call-seq:
1044  * enc.inspect -> string
1045  *
1046  * Returns a string which represents the encoding for programmers.
1047  *
1048  * Encoding::UTF_8.inspect #=> "#<Encoding:UTF-8>"
1049  * Encoding::ISO_2022_JP.inspect #=> "#<Encoding:ISO-2022-JP (dummy)>"
1050  */
1051 static VALUE
1053 {
1054  rb_encoding *enc;
1055 
1056  if (!is_data_encoding(self)) {
1057  not_encoding(self);
1058  }
1059  if (!(enc = DATA_PTR(self)) || rb_enc_from_index(rb_enc_to_index(enc)) != enc) {
1060  rb_raise(rb_eTypeError, "broken Encoding");
1061  }
1063  "#<%"PRIsVALUE":%s%s%s>", rb_obj_class(self),
1064  rb_enc_name(enc),
1065  (ENC_DUMMY_P(enc) ? " (dummy)" : ""),
1066  enc_autoload_p(enc) ? " (autoload)" : "");
1067 }
1068 
1069 /*
1070  * call-seq:
1071  * enc.name -> string
1072  * enc.to_s -> string
1073  *
1074  * Returns the name of the encoding.
1075  *
1076  * Encoding::UTF_8.name #=> "UTF-8"
1077  */
1078 static VALUE
1080 {
1082 }
1083 
1084 static int
1086 {
1087  VALUE *arg = (VALUE *)args;
1088 
1089  if ((int)idx == (int)arg[0]) {
1090  VALUE str = rb_usascii_str_new2((char *)name);
1091  OBJ_FREEZE(str);
1092  rb_ary_push(arg[1], str);
1093  }
1094  return ST_CONTINUE;
1095 }
1096 
1097 /*
1098  * call-seq:
1099  * enc.names -> array
1100  *
1101  * Returns the list of name and aliases of the encoding.
1102  *
1103  * Encoding::WINDOWS_31J.names #=> ["Windows-31J", "CP932", "csWindows31J"]
1104  */
1105 static VALUE
1107 {
1108  VALUE args[2];
1109 
1110  args[0] = (VALUE)rb_to_encoding_index(self);
1111  args[1] = rb_ary_new2(0);
1112  st_foreach(enc_table.names, enc_names_i, (st_data_t)args);
1113  return args[1];
1114 }
1115 
1116 /*
1117  * call-seq:
1118  * Encoding.list -> [enc1, enc2, ...]
1119  *
1120  * Returns the list of loaded encodings.
1121  *
1122  * Encoding.list
1123  * #=> [#<Encoding:ASCII-8BIT>, #<Encoding:UTF-8>,
1124  * #<Encoding:ISO-2022-JP (dummy)>]
1125  *
1126  * Encoding.find("US-ASCII")
1127  * #=> #<Encoding:US-ASCII>
1128  *
1129  * Encoding.list
1130  * #=> [#<Encoding:ASCII-8BIT>, #<Encoding:UTF-8>,
1131  * #<Encoding:US-ASCII>, #<Encoding:ISO-2022-JP (dummy)>]
1132  *
1133  */
1134 static VALUE
1136 {
1137  VALUE ary = rb_ary_new2(0);
1139  return ary;
1140 }
1141 
1142 /*
1143  * call-seq:
1144  * Encoding.find(string) -> enc
1145  *
1146  * Search the encoding with specified <i>name</i>.
1147  * <i>name</i> should be a string.
1148  *
1149  * Encoding.find("US-ASCII") #=> #<Encoding:US-ASCII>
1150  *
1151  * Names which this method accept are encoding names and aliases
1152  * including following special aliases
1153  *
1154  * "external":: default external encoding
1155  * "internal":: default internal encoding
1156  * "locale":: locale encoding
1157  * "filesystem":: filesystem encoding
1158  *
1159  * An ArgumentError is raised when no encoding with <i>name</i>.
1160  * Only <code>Encoding.find("internal")</code> however returns nil
1161  * when no encoding named "internal", in other words, when Ruby has no
1162  * default internal encoding.
1163  */
1164 static VALUE
1166 {
1167  int idx;
1168  if (is_obj_encoding(enc))
1169  return enc;
1170  idx = str_to_encindex(enc);
1171  if (idx == UNSPECIFIED_ENCODING) return Qnil;
1172  return rb_enc_from_encoding_index(idx);
1173 }
1174 
1175 /*
1176  * call-seq:
1177  * Encoding.compatible?(obj1, obj2) -> enc or nil
1178  *
1179  * Checks the compatibility of two objects.
1180  *
1181  * If the objects are both strings they are compatible when they are
1182  * concatenatable. The encoding of the concatenated string will be returned
1183  * if they are compatible, nil if they are not.
1184  *
1185  * Encoding.compatible?("\xa1".force_encoding("iso-8859-1"), "b")
1186  * #=> #<Encoding:ISO-8859-1>
1187  *
1188  * Encoding.compatible?(
1189  * "\xa1".force_encoding("iso-8859-1"),
1190  * "\xa1\xa1".force_encoding("euc-jp"))
1191  * #=> nil
1192  *
1193  * If the objects are non-strings their encodings are compatible when they
1194  * have an encoding and:
1195  * * Either encoding is US-ASCII compatible
1196  * * One of the encodings is a 7-bit encoding
1197  *
1198  */
1199 static VALUE
1200 enc_compatible_p(VALUE klass, VALUE str1, VALUE str2)
1201 {
1202  rb_encoding *enc;
1203 
1204  if (!enc_capable(str1)) return Qnil;
1205  if (!enc_capable(str2)) return Qnil;
1206  enc = rb_enc_compatible(str1, str2);
1207  if (!enc) return Qnil;
1208  return rb_enc_from_encoding(enc);
1209 }
1210 
1211 /* :nodoc: */
1212 static VALUE
1214 {
1215  rb_scan_args(argc, argv, "01", 0);
1216  return enc_name(self);
1217 }
1218 
1219 /* :nodoc: */
1220 static VALUE
1221 enc_load(VALUE klass, VALUE str)
1222 {
1223  return enc_find(klass, str);
1224 }
1225 
1226 rb_encoding *
1228 {
1229  if (!enc_table.list) {
1230  rb_enc_init();
1231  }
1232  return enc_table.list[ENCINDEX_ASCII].enc;
1233 }
1234 
1235 int
1237 {
1238  return ENCINDEX_ASCII;
1239 }
1240 
1241 rb_encoding *
1243 {
1244  if (!enc_table.list) {
1245  rb_enc_init();
1246  }
1247  return enc_table.list[ENCINDEX_UTF_8].enc;
1248 }
1249 
1250 int
1252 {
1253  return ENCINDEX_UTF_8;
1254 }
1255 
1256 rb_encoding *
1258 {
1259  if (!enc_table.list) {
1260  rb_enc_init();
1261  }
1262  return enc_table.list[ENCINDEX_US_ASCII].enc;
1263 }
1264 
1265 int
1267 {
1268  return ENCINDEX_US_ASCII;
1269 }
1270 
1271 int
1273 {
1275  int idx;
1276 
1277  if (NIL_P(charmap))
1278  idx = ENCINDEX_US_ASCII;
1279  else if ((idx = rb_enc_find_index(StringValueCStr(charmap))) < 0)
1280  idx = ENCINDEX_ASCII;
1281 
1282  if (rb_enc_registered("locale") < 0) {
1283 # if defined _WIN32
1284  void Init_w32_codepage(void);
1286 # endif
1287  enc_alias_internal("locale", idx);
1288  }
1289 
1290  return idx;
1291 }
1292 
1293 rb_encoding *
1295 {
1297 }
1298 
1299 int
1301 {
1302  int idx = rb_enc_registered("filesystem");
1303  if (idx < 0)
1304  idx = ENCINDEX_ASCII;
1305  return idx;
1306 }
1307 
1308 rb_encoding *
1310 {
1312 }
1313 
1315  int index; /* -2 => not yet set, -1 => nil */
1317 };
1318 
1320 
1321 extern int Init_enc_set_filesystem_encoding(void);
1322 
1323 static int
1324 enc_set_default_encoding(struct default_encoding *def, VALUE encoding, const char *name)
1325 {
1326  int overridden = FALSE;
1327 
1328  if (def->index != -2)
1329  /* Already set */
1330  overridden = TRUE;
1331 
1332  if (NIL_P(encoding)) {
1333  def->index = -1;
1334  def->enc = 0;
1335  st_insert(enc_table.names, (st_data_t)strdup(name),
1337  }
1338  else {
1339  def->index = rb_enc_to_index(rb_to_encoding(encoding));
1340  def->enc = 0;
1341  enc_alias_internal(name, def->index);
1342  }
1343 
1344  if (def == &default_external)
1346 
1347  return overridden;
1348 }
1349 
1350 rb_encoding *
1352 {
1353  if (default_external.enc) return default_external.enc;
1354 
1355  if (default_external.index >= 0) {
1356  default_external.enc = rb_enc_from_index(default_external.index);
1357  return default_external.enc;
1358  }
1359  else {
1360  return rb_locale_encoding();
1361  }
1362 }
1363 
1364 VALUE
1366 {
1368 }
1369 
1370 /*
1371  * call-seq:
1372  * Encoding.default_external -> enc
1373  *
1374  * Returns default external encoding.
1375  *
1376  * The default external encoding is used by default for strings created from
1377  * the following locations:
1378  *
1379  * * CSV
1380  * * File data read from disk
1381  * * SDBM
1382  * * StringIO
1383  * * Zlib::GzipReader
1384  * * Zlib::GzipWriter
1385  * * String#inspect
1386  * * Regexp#inspect
1387  *
1388  * While strings created from these locations will have this encoding, the
1389  * encoding may not be valid. Be sure to check String#valid_encoding?.
1390  *
1391  * File data written to disk will be transcoded to the default external
1392  * encoding when written.
1393  *
1394  * The default external encoding is initialized by the locale or -E option.
1395  */
1396 static VALUE
1398 {
1399  return rb_enc_default_external();
1400 }
1401 
1402 void
1404 {
1405  if (NIL_P(encoding)) {
1406  rb_raise(rb_eArgError, "default external can not be nil");
1407  }
1408  enc_set_default_encoding(&default_external, encoding,
1409  "external");
1410 }
1411 
1412 /*
1413  * call-seq:
1414  * Encoding.default_external = enc
1415  *
1416  * Sets default external encoding. You should not set
1417  * Encoding::default_external in ruby code as strings created before changing
1418  * the value may have a different encoding from strings created after the value
1419  * was changed., instead you should use <tt>ruby -E</tt> to invoke ruby with
1420  * the correct default_external.
1421  *
1422  * See Encoding::default_external for information on how the default external
1423  * encoding is used.
1424  */
1425 static VALUE
1427 {
1428  rb_warning("setting Encoding.default_external");
1429  rb_enc_set_default_external(encoding);
1430  return encoding;
1431 }
1432 
1433 static struct default_encoding default_internal = {-2};
1434 
1435 rb_encoding *
1437 {
1438  if (!default_internal.enc && default_internal.index >= 0) {
1439  default_internal.enc = rb_enc_from_index(default_internal.index);
1440  }
1441  return default_internal.enc; /* can be NULL */
1442 }
1443 
1444 VALUE
1446 {
1447  /* Note: These functions cope with default_internal not being set */
1449 }
1450 
1451 /*
1452  * call-seq:
1453  * Encoding.default_internal -> enc
1454  *
1455  * Returns default internal encoding. Strings will be transcoded to the
1456  * default internal encoding in the following places if the default internal
1457  * encoding is not nil:
1458  *
1459  * * CSV
1460  * * Etc.sysconfdir and Etc.systmpdir
1461  * * File data read from disk
1462  * * File names from Dir
1463  * * Integer#chr
1464  * * String#inspect and Regexp#inspect
1465  * * Strings returned from Readline
1466  * * Strings returned from SDBM
1467  * * Time#zone
1468  * * Values from ENV
1469  * * Values in ARGV including $PROGRAM_NAME
1470  * * __FILE__
1471  *
1472  * Additionally String#encode and String#encode! use the default internal
1473  * encoding if no encoding is given.
1474  *
1475  * The locale encoding (__ENCODING__), not default_internal, is used as the
1476  * encoding of created strings.
1477  *
1478  * Encoding::default_internal is initialized by the source file's
1479  * internal_encoding or -E option.
1480  */
1481 static VALUE
1483 {
1484  return rb_enc_default_internal();
1485 }
1486 
1487 void
1489 {
1490  enc_set_default_encoding(&default_internal, encoding,
1491  "internal");
1492 }
1493 
1494 /*
1495  * call-seq:
1496  * Encoding.default_internal = enc or nil
1497  *
1498  * Sets default internal encoding or removes default internal encoding when
1499  * passed nil. You should not set Encoding::default_internal in ruby code as
1500  * strings created before changing the value may have a different encoding
1501  * from strings created after the change. Instead you should use
1502  * <tt>ruby -E</tt> to invoke ruby with the correct default_internal.
1503  *
1504  * See Encoding::default_internal for information on how the default internal
1505  * encoding is used.
1506  */
1507 static VALUE
1509 {
1510  rb_warning("setting Encoding.default_internal");
1511  rb_enc_set_default_internal(encoding);
1512  return encoding;
1513 }
1514 
1515 /*
1516  * call-seq:
1517  * Encoding.locale_charmap -> string
1518  *
1519  * Returns the locale charmap name.
1520  * It returns nil if no appropriate information.
1521  *
1522  * Debian GNU/Linux
1523  * LANG=C
1524  * Encoding.locale_charmap #=> "ANSI_X3.4-1968"
1525  * LANG=ja_JP.EUC-JP
1526  * Encoding.locale_charmap #=> "EUC-JP"
1527  *
1528  * SunOS 5
1529  * LANG=C
1530  * Encoding.locale_charmap #=> "646"
1531  * LANG=ja
1532  * Encoding.locale_charmap #=> "eucJP"
1533  *
1534  * The result is highly platform dependent.
1535  * So Encoding.find(Encoding.locale_charmap) may cause an error.
1536  * If you need some encoding object even for unknown locale,
1537  * Encoding.find("locale") can be used.
1538  *
1539  */
1540 VALUE
1541 rb_locale_charmap(VALUE klass);
1542 
1543 static void
1545 {
1546  VALUE encoding = rb_enc_from_encoding(enc);
1547  char *s = (char *)name;
1548  int haslower = 0, hasupper = 0, valid = 0;
1549 
1550  if (ISDIGIT(*s)) return;
1551  if (ISUPPER(*s)) {
1552  hasupper = 1;
1553  while (*++s && (ISALNUM(*s) || *s == '_')) {
1554  if (ISLOWER(*s)) haslower = 1;
1555  }
1556  }
1557  if (!*s) {
1558  if (s - name > ENCODING_NAMELEN_MAX) return;
1559  valid = 1;
1560  rb_define_const(rb_cEncoding, name, encoding);
1561  }
1562  if (!valid || haslower) {
1563  size_t len = s - name;
1564  if (len > ENCODING_NAMELEN_MAX) return;
1565  if (!haslower || !hasupper) {
1566  do {
1567  if (ISLOWER(*s)) haslower = 1;
1568  if (ISUPPER(*s)) hasupper = 1;
1569  } while (*++s && (!haslower || !hasupper));
1570  len = s - name;
1571  }
1572  len += strlen(s);
1573  if (len++ > ENCODING_NAMELEN_MAX) return;
1574  MEMCPY(s = ALLOCA_N(char, len), name, char, len);
1575  name = s;
1576  if (!valid) {
1577  if (ISLOWER(*s)) *s = ONIGENC_ASCII_CODE_TO_UPPER_CASE((int)*s);
1578  for (; *s; ++s) {
1579  if (!ISALNUM(*s)) *s = '_';
1580  }
1581  if (hasupper) {
1582  rb_define_const(rb_cEncoding, name, encoding);
1583  }
1584  }
1585  if (haslower) {
1586  for (s = (char *)name; *s; ++s) {
1587  if (ISLOWER(*s)) *s = ONIGENC_ASCII_CODE_TO_UPPER_CASE((int)*s);
1588  }
1589  rb_define_const(rb_cEncoding, name, encoding);
1590  }
1591  }
1592 }
1593 
1594 static int
1596 {
1597  VALUE ary = (VALUE)arg;
1598  VALUE str = rb_usascii_str_new2((char *)name);
1599  OBJ_FREEZE(str);
1600  rb_ary_push(ary, str);
1601  return ST_CONTINUE;
1602 }
1603 
1604 /*
1605  * call-seq:
1606  * Encoding.name_list -> ["enc1", "enc2", ...]
1607  *
1608  * Returns the list of available encoding names.
1609  *
1610  * Encoding.name_list
1611  * #=> ["US-ASCII", "ASCII-8BIT", "UTF-8",
1612  * "ISO-8859-1", "Shift_JIS", "EUC-JP",
1613  * "Windows-31J",
1614  * "BINARY", "CP932", "eucJP"]
1615  *
1616  */
1617 
1618 static VALUE
1620 {
1621  VALUE ary = rb_ary_new2(enc_table.names->num_entries);
1623  return ary;
1624 }
1625 
1626 static int
1628 {
1629  VALUE *p = (VALUE *)arg;
1630  VALUE aliases = p[0], ary = p[1];
1631  int idx = (int)orig;
1632  VALUE key, str = rb_ary_entry(ary, idx);
1633 
1634  if (NIL_P(str)) {
1636 
1637  if (!enc) return ST_CONTINUE;
1638  if (STRCASECMP((char*)name, rb_enc_name(enc)) == 0) {
1639  return ST_CONTINUE;
1640  }
1641  str = rb_usascii_str_new2(rb_enc_name(enc));
1642  OBJ_FREEZE(str);
1643  rb_ary_store(ary, idx, str);
1644  }
1645  key = rb_usascii_str_new2((char *)name);
1646  OBJ_FREEZE(key);
1647  rb_hash_aset(aliases, key, str);
1648  return ST_CONTINUE;
1649 }
1650 
1651 /*
1652  * call-seq:
1653  * Encoding.aliases -> {"alias1" => "orig1", "alias2" => "orig2", ...}
1654  *
1655  * Returns the hash of available encoding alias and original encoding name.
1656  *
1657  * Encoding.aliases
1658  * #=> {"BINARY"=>"ASCII-8BIT", "ASCII"=>"US-ASCII", "ANSI_X3.4-1986"=>"US-ASCII",
1659  * "SJIS"=>"Shift_JIS", "eucJP"=>"EUC-JP", "CP932"=>"Windows-31J"}
1660  *
1661  */
1662 
1663 static VALUE
1665 {
1666  VALUE aliases[2];
1667  aliases[0] = rb_hash_new();
1668  aliases[1] = rb_ary_new();
1670  return aliases[0];
1671 }
1672 
1673 /*
1674  * An Encoding instance represents a character encoding usable in Ruby. It is
1675  * defined as a constant under the Encoding namespace. It has a name and
1676  * optionally, aliases:
1677  *
1678  * Encoding::ISO_8859_1.name
1679  * #=> #<Encoding:ISO-8859-1>
1680  *
1681  * Encoding::ISO_8859_1.names
1682  * #=> ["ISO-8859-1", "ISO8859-1"]
1683  *
1684  * Ruby methods dealing with encodings return or accept Encoding instances as
1685  * arguments (when a method accepts an Encoding instance as an argument, it
1686  * can be passed an Encoding name or alias instead).
1687  *
1688  * "some string".encoding
1689  * #=> #<Encoding:UTF-8>
1690  *
1691  * string = "some string".encode(Encoding::ISO_8859_1)
1692  * #=> "some string"
1693  * string.encoding
1694  * #=> #<Encoding:ISO-8859-1>
1695  *
1696  * "some string".encode "ISO-8859-1"
1697  * #=> "some string"
1698  *
1699  * <code>Encoding::ASCII_8BIT</code> is a special encoding that is usually
1700  * used for a byte string, not a character string. But as the name insists,
1701  * its characters in the range of ASCII are considered as ASCII characters.
1702  * This is useful when you use ASCII-8BIT characters with other ASCII
1703  * compatible characters.
1704  *
1705  * == Changing an encoding
1706  *
1707  * The associated Encoding of a String can be changed in two different ways.
1708  *
1709  * First, it is possible to set the Encoding of a string to a new Encoding
1710  * without changing the internal byte representation of the string, with
1711  * String#force_encoding. This is how you can tell Ruby the correct encoding
1712  * of a string.
1713  *
1714  * string
1715  * #=> "R\xC3\xA9sum\xC3\xA9"
1716  * string.encoding
1717  * #=> #<Encoding:ISO-8859-1>
1718  * string.force_encoding(Encoding::UTF_8)
1719  * #=> "R\u00E9sum\u00E9"
1720  *
1721  * Second, it is possible to transcode a string, i.e. translate its internal
1722  * byte representation to another encoding. Its associated encoding is also
1723  * set to the other encoding. See String#encode for the various forms of
1724  * transcoding, and the Encoding::Converter class for additional control over
1725  * the transcoding process.
1726  *
1727  * string
1728  * #=> "R\u00E9sum\u00E9"
1729  * string.encoding
1730  * #=> #<Encoding:UTF-8>
1731  * string = string.encode!(Encoding::ISO_8859_1)
1732  * #=> "R\xE9sum\xE9"
1733  * string.encoding
1734  * #=> #<Encoding::ISO-8859-1>
1735  *
1736  * == Script encoding
1737  *
1738  * All Ruby script code has an associated Encoding which any String literal
1739  * created in the source code will be associated to.
1740  *
1741  * The default script encoding is <code>Encoding::UTF-8</code> after v2.0, but it can
1742  * be changed by a magic comment on the first line of the source code file (or
1743  * second line, if there is a shebang line on the first). The comment must
1744  * contain the word <code>coding</code> or <code>encoding</code>, followed
1745  * by a colon, space and the Encoding name or alias:
1746  *
1747  * # encoding: UTF-8
1748  *
1749  * "some string".encoding
1750  * #=> #<Encoding:UTF-8>
1751  *
1752  * The <code>__ENCODING__</code> keyword returns the script encoding of the file
1753  * which the keyword is written:
1754  *
1755  * # encoding: ISO-8859-1
1756  *
1757  * __ENCODING__
1758  * #=> #<Encoding:ISO-8859-1>
1759  *
1760  * <code>ruby -K</code> will change the default locale encoding, but this is
1761  * not recommended. Ruby source files should declare its script encoding by a
1762  * magic comment even when they only depend on US-ASCII strings or regular
1763  * expressions.
1764  *
1765  * == Locale encoding
1766  *
1767  * The default encoding of the environment. Usually derived from locale.
1768  *
1769  * see Encoding.locale_charmap, Encoding.find('locale')
1770  *
1771  * == Filesystem encoding
1772  *
1773  * The default encoding of strings from the filesystem of the environment.
1774  * This is used for strings of file names or paths.
1775  *
1776  * see Encoding.find('filesystem')
1777  *
1778  * == External encoding
1779  *
1780  * Each IO object has an external encoding which indicates the encoding that
1781  * Ruby will use to read its data. By default Ruby sets the external encoding
1782  * of an IO object to the default external encoding. The default external
1783  * encoding is set by locale encoding or the interpreter <code>-E</code> option.
1784  * Encoding.default_external returns the current value of the external
1785  * encoding.
1786  *
1787  * ENV["LANG"]
1788  * #=> "UTF-8"
1789  * Encoding.default_external
1790  * #=> #<Encoding:UTF-8>
1791  *
1792  * $ ruby -E ISO-8859-1 -e "p Encoding.default_external"
1793  * #<Encoding:ISO-8859-1>
1794  *
1795  * $ LANG=C ruby -e 'p Encoding.default_external'
1796  * #<Encoding:US-ASCII>
1797  *
1798  * The default external encoding may also be set through
1799  * Encoding.default_external=, but you should not do this as strings created
1800  * before and after the change will have inconsistent encodings. Instead use
1801  * <code>ruby -E</code> to invoke ruby with the correct external encoding.
1802  *
1803  * When you know that the actual encoding of the data of an IO object is not
1804  * the default external encoding, you can reset its external encoding with
1805  * IO#set_encoding or set it at IO object creation (see IO.new options).
1806  *
1807  * == Internal encoding
1808  *
1809  * To process the data of an IO object which has an encoding different
1810  * from its external encoding, you can set its internal encoding. Ruby will use
1811  * this internal encoding to transcode the data when it is read from the IO
1812  * object.
1813  *
1814  * Conversely, when data is written to the IO object it is transcoded from the
1815  * internal encoding to the external encoding of the IO object.
1816  *
1817  * The internal encoding of an IO object can be set with
1818  * IO#set_encoding or at IO object creation (see IO.new options).
1819  *
1820  * The internal encoding is optional and when not set, the Ruby default
1821  * internal encoding is used. If not explicitly set this default internal
1822  * encoding is +nil+ meaning that by default, no transcoding occurs.
1823  *
1824  * The default internal encoding can be set with the interpreter option
1825  * <code>-E</code>. Encoding.default_internal returns the current internal
1826  * encoding.
1827  *
1828  * $ ruby -e 'p Encoding.default_internal'
1829  * nil
1830  *
1831  * $ ruby -E ISO-8859-1:UTF-8 -e "p [Encoding.default_external, \
1832  * Encoding.default_internal]"
1833  * [#<Encoding:ISO-8859-1>, #<Encoding:UTF-8>]
1834  *
1835  * The default internal encoding may also be set through
1836  * Encoding.default_internal=, but you should not do this as strings created
1837  * before and after the change will have inconsistent encodings. Instead use
1838  * <code>ruby -E</code> to invoke ruby with the correct internal encoding.
1839  *
1840  * == IO encoding example
1841  *
1842  * In the following example a UTF-8 encoded string "R\u00E9sum\u00E9" is transcoded for
1843  * output to ISO-8859-1 encoding, then read back in and transcoded to UTF-8:
1844  *
1845  * string = "R\u00E9sum\u00E9"
1846  *
1847  * open("transcoded.txt", "w:ISO-8859-1") do |io|
1848  * io.write(string)
1849  * end
1850  *
1851  * puts "raw text:"
1852  * p File.binread("transcoded.txt")
1853  * puts
1854  *
1855  * open("transcoded.txt", "r:ISO-8859-1:UTF-8") do |io|
1856  * puts "transcoded text:"
1857  * p io.read
1858  * end
1859  *
1860  * While writing the file, the internal encoding is not specified as it is
1861  * only necessary for reading. While reading the file both the internal and
1862  * external encoding must be specified to obtain the correct result.
1863  *
1864  * $ ruby t.rb
1865  * raw text:
1866  * "R\xE9sum\xE9"
1867  *
1868  * transcoded text:
1869  * "R\u00E9sum\u00E9"
1870  *
1871  */
1872 
1873 void
1875 {
1876 #undef rb_intern
1877 #define rb_intern(str) rb_intern_const(str)
1878  VALUE list;
1879  int i;
1880 
1881  rb_cEncoding = rb_define_class("Encoding", rb_cObject);
1884  rb_define_method(rb_cEncoding, "to_s", enc_name, 0);
1885  rb_define_method(rb_cEncoding, "inspect", enc_inspect, 0);
1886  rb_define_method(rb_cEncoding, "name", enc_name, 0);
1887  rb_define_method(rb_cEncoding, "names", enc_names, 0);
1888  rb_define_method(rb_cEncoding, "dummy?", enc_dummy_p, 0);
1889  rb_define_method(rb_cEncoding, "ascii_compatible?", enc_ascii_compatible_p, 0);
1890  rb_define_method(rb_cEncoding, "replicate", enc_replicate, 1);
1896 
1897  rb_define_method(rb_cEncoding, "_dump", enc_dump, -1);
1899 
1905 
1906  list = rb_ary_new2(enc_table.count);
1907  RBASIC_CLEAR_CLASS(list);
1910 
1911  for (i = 0; i < enc_table.count; ++i) {
1912  rb_ary_push(list, enc_new(enc_table.list[i].enc));
1913  }
1914 }
1915 
1916 /* locale insensitive ctype functions */
1917 
1918 #define ctype_test(c, ctype) \
1919  (rb_isascii(c) && ONIGENC_IS_ASCII_CODE_CTYPE((c), (ctype)))
1920 
1921 int rb_isalnum(int c) { return ctype_test(c, ONIGENC_CTYPE_ALNUM); }
1922 int rb_isalpha(int c) { return ctype_test(c, ONIGENC_CTYPE_ALPHA); }
1923 int rb_isblank(int c) { return ctype_test(c, ONIGENC_CTYPE_BLANK); }
1924 int rb_iscntrl(int c) { return ctype_test(c, ONIGENC_CTYPE_CNTRL); }
1925 int rb_isdigit(int c) { return ctype_test(c, ONIGENC_CTYPE_DIGIT); }
1926 int rb_isgraph(int c) { return ctype_test(c, ONIGENC_CTYPE_GRAPH); }
1927 int rb_islower(int c) { return ctype_test(c, ONIGENC_CTYPE_LOWER); }
1928 int rb_isprint(int c) { return ctype_test(c, ONIGENC_CTYPE_PRINT); }
1929 int rb_ispunct(int c) { return ctype_test(c, ONIGENC_CTYPE_PUNCT); }
1930 int rb_isspace(int c) { return ctype_test(c, ONIGENC_CTYPE_SPACE); }
1931 int rb_isupper(int c) { return ctype_test(c, ONIGENC_CTYPE_UPPER); }
1932 int rb_isxdigit(int c) { return ctype_test(c, ONIGENC_CTYPE_XDIGIT); }
1933 
1934 int
1936 {
1937  return rb_isascii(c) ? ONIGENC_ASCII_CODE_TO_LOWER_CASE(c) : c;
1938 }
1939 
1940 int
1942 {
1943  return rb_isascii(c) ? ONIGENC_ASCII_CODE_TO_UPPER_CASE(c) : c;
1944 }
1945 
1946 void
1948 {
1949  st_foreach(enc_table.names, func, arg);
1950 }
static void enc_set_index(VALUE obj, int idx)
Definition: encoding.c:779
#define RBASIC_CLEAR_CLASS(obj)
Definition: internal.h:607
static int rb_enc_name_list_i(st_data_t name, st_data_t idx, st_data_t arg)
Definition: encoding.c:1595
rb_encoding OnigEncodingUS_ASCII
#define ONIGENC_CTYPE_BLANK
Definition: oniguruma.h:196
#define ONIGENC_CTYPE_PUNCT
Definition: oniguruma.h:202
#define ISDIGIT(c)
Definition: ruby.h:1775
int rb_enc_codelen(int c, rb_encoding *enc)
Definition: encoding.c:1014
int rb_enc_get_index(VALUE obj)
Definition: encoding.c:739
#define MBCLEN_CHARFOUND_P(ret)
Definition: encoding.h:139
#define is_obj_encoding(obj)
Definition: encoding.c:78
rb_encoding * rb_enc_check(VALUE str1, VALUE str2)
Definition: encoding.c:838
VALUE rb_ary_entry(VALUE ary, long offset)
Definition: array.c:1171
#define MBCLEN_CHARFOUND_LEN(ret)
Definition: encoding.h:140
void rb_bug(const char *fmt,...)
Definition: error.c:327
#define rb_enc_mbc_to_codepoint(p, e, enc)
Definition: encoding.h:156
void rb_enc_copy(VALUE obj1, VALUE obj2)
Definition: encoding.c:916
#define FALSE
Definition: nkf.h:174
#define RUBY_TYPED_FREE_IMMEDIATELY
Definition: ruby.h:1015
void rb_enc_set_base(const char *name, const char *orig)
Definition: encoding.c:360
size_t strlen(const char *)
#define INT2NUM(x)
Definition: ruby.h:1288
Definition: st.h:69
VALUE rb_id2str(ID id)
Definition: ripper.c:17157
VALUE rb_cEncoding
Definition: encoding.c:37
static VALUE enc_load(VALUE klass, VALUE str)
Definition: encoding.c:1221
#define NUM2INT(x)
Definition: ruby.h:630
int count
Definition: encoding.c:48
void rb_undef_alloc_func(VALUE)
Definition: vm_method.c:518
void rb_define_singleton_method(VALUE obj, const char *name, VALUE(*func)(ANYARGS), int argc)
Defines a singleton method for obj.
Definition: class.c:1655
static int rb_enc_aliases_enc_i(st_data_t name, st_data_t orig, st_data_t arg)
Definition: encoding.c:1627
static VALUE enc_inspect(VALUE self)
Definition: encoding.c:1052
static rb_encoding * set_base_encoding(int index, rb_encoding *base)
Definition: encoding.c:346
void rb_str_fill_terminator(VALUE str, const int termlen)
Definition: string.c:1670
#define rb_usascii_str_new2
Definition: intern.h:846
#define FL_TAINT
Definition: ruby.h:1137
#define CLASS_OF(v)
Definition: ruby.h:440
static VALUE rb_enc_name_list(VALUE klass)
Definition: encoding.c:1619
static VALUE enc_list(VALUE klass)
Definition: encoding.c:1135
static int enc_register_at(int index, const char *name, rb_encoding *encoding)
Definition: encoding.c:257
int rb_toupper(int c)
Definition: encoding.c:1941
#define Qtrue
Definition: ruby.h:426
int st_insert(st_table *, st_data_t, st_data_t)
void Init_Encoding(void)
Definition: encoding.c:1874
#define TypedData_Wrap_Struct(klass, data_type, sval)
Definition: ruby.h:1027
static int str_to_encindex(VALUE enc)
Definition: encoding.c:203
rb_encoding * rb_to_encoding(VALUE enc)
Definition: encoding.c:219
#define ENC_CODERANGE_CLEAR(obj)
Definition: encoding.h:56
VALUE rb_enc_from_encoding(rb_encoding *encoding)
Definition: encoding.c:102
int rb_enc_tolower(int c, rb_encoding *enc)
Definition: encoding.c:1037
VALUE rb_eTypeError
Definition: error.c:548
st_table * names
Definition: encoding.c:50
rb_encoding * rb_default_internal_encoding(void)
Definition: encoding.c:1436
VALUE rb_ary_push(VALUE ary, VALUE item)
Definition: array.c:896
VALUE rb_eEncodingError
Definition: error.c:554
#define SYM2ID(x)
Definition: ruby.h:356
int rb_usascii_encindex(void)
Definition: encoding.c:1266
rb_encoding * rb_enc_compatible(VALUE str1, VALUE str2)
Definition: encoding.c:849
static VALUE enc_names(VALUE self)
Definition: encoding.c:1106
static struct @5 enc_table
static rb_encoding * must_encindex(int index)
Definition: encoding.c:152
VALUE rb_funcall(VALUE, ID, int,...)
Calls a method.
Definition: vm_eval.c:775
int rb_isblank(int c)
Definition: encoding.c:1923
static int enc_table_expand(int newsize)
Definition: encoding.c:241
Definition: nkf.c:115
VALUE rb_protect(VALUE(*proc)(VALUE), VALUE data, int *state)
Definition: eval.c:807
#define ENCODING_GET_INLINED(obj)
Definition: encoding.h:37
int rb_enc_str_coderange(VALUE)
Definition: string.c:435
unsigned int rb_enc_codepoint_len(const char *p, const char *e, int *len_p, rb_encoding *enc)
Definition: encoding.c:993
static VALUE enc_new(rb_encoding *encoding)
Definition: encoding.c:81
void rb_raise(VALUE exc, const char *fmt,...)
Definition: error.c:1854
VALUE rb_ivar_get(VALUE, ID)
Definition: variable.c:1115
unsigned int flags
Definition: oniguruma.h:176
#define ONIGENC_CTYPE_CNTRL
Definition: oniguruma.h:197
void rb_enc_set_default_external(VALUE encoding)
Definition: encoding.c:1403
VALUE rb_enc_associate(VALUE obj, rb_encoding *enc)
Definition: encoding.c:826
int rb_isupper(int c)
Definition: encoding.c:1931
int rb_enc_set_dummy(int index)
Definition: encoding.c:371
static VALUE rb_enc_aliases(VALUE klass)
Definition: encoding.c:1664
static VALUE set_default_external(VALUE klass, VALUE encoding)
Definition: encoding.c:1426
int rb_enc_mbclen(const char *p, const char *e, rb_encoding *enc)
Definition: encoding.c:946
int rb_isprint(int c)
Definition: encoding.c:1928
#define DATA_PTR(dta)
Definition: ruby.h:992
VALUE rb_locale_charmap(VALUE klass)
Definition: localeinit.c:23
const char * alias
Definition: nkf.c:1151
#define ENC_SET_DUMMY(enc)
Definition: encoding.h:242
#define ENC_REGISTER(enc)
int rb_enc_registered(const char *name)
Definition: encoding.c:608
ID rb_id_encoding(void)
Definition: encoding.c:732
void Init_w32_codepage(void)
Definition: file.c:702
int rb_isdigit(int c)
Definition: encoding.c:1925
static int enc_alias_internal(const char *alias, int idx)
Definition: encoding.c:512
#define ONIGENC_CTYPE_ALNUM
Definition: oniguruma.h:207
int rb_filesystem_encindex(void)
Definition: encoding.c:1300
void rb_enc_init(void)
Definition: encoding.c:563
rb_encoding * rb_utf8_encoding(void)
Definition: encoding.c:1242
void rb_loaderror(const char *fmt,...)
Definition: error.c:1876
#define RDATA(obj)
Definition: ruby.h:1125
void rb_undef_method(VALUE klass, const char *name)
Definition: class.c:1506
Definition: nkf.c:111
#define ONIGENC_CTYPE_ALPHA
Definition: oniguruma.h:195
#define ENC_CODERANGE_7BIT
Definition: encoding.h:49
VALUE rb_enc_sprintf(rb_encoding *enc, const char *format,...)
Definition: sprintf.c:1231
static VALUE enc_dummy_p(VALUE enc)
Definition: encoding.c:471
#define rb_ary_new2
Definition: intern.h:90
static VALUE rb_enc_from_encoding_index(int idx)
Definition: encoding.c:87
int rb_enc_toupper(int c, rb_encoding *enc)
Definition: encoding.c:1031
RUBY_SYMBOL_EXPORT_BEGIN typedef unsigned long st_data_t
Definition: st.h:20
Definition: nkf.c:87
#define ONIGENC_CTYPE_UPPER
Definition: oniguruma.h:204
int st_insert2(st_table *, st_data_t, st_data_t, st_data_t(*)(st_data_t))
st_table * st_init_strcasetable(void)
Definition: st.c:296
#define ctype_test(c, ctype)
Definition: encoding.c:1918
#define RB_TYPE_P(obj, type)
Definition: ruby.h:1664
void rb_encdb_set_unicode(int index)
Definition: encoding.c:554
int st_lookup(st_table *, st_data_t, st_data_t *)
static int str_find_encindex(VALUE enc)
Definition: encoding.c:190
int rb_to_encoding_index(VALUE enc)
Definition: encoding.c:171
rb_encoding * rb_default_external_encoding(void)
Definition: encoding.c:1351
#define ONIGENC_CTYPE_DIGIT
Definition: oniguruma.h:198
#define ONIGENC_MBC_ENC_LEN(enc, p, e)
Definition: oniguruma.h:261
static void not_encoding(VALUE enc)
Definition: encoding.c:135
VALUE rb_hash_aset(VALUE hash, VALUE key, VALUE val)
Definition: hash.c:1393
int rb_isxdigit(int c)
Definition: encoding.c:1932
int rb_ispunct(int c)
Definition: encoding.c:1929
int rb_enc_fast_mbclen(const char *p, const char *e, rb_encoding *enc)
Definition: encoding.c:940
RUBY_EXTERN VALUE rb_cObject
Definition: ruby.h:1553
Definition: encoding.c:40
#define ONIGENC_CTYPE_XDIGIT
Definition: oniguruma.h:205
#define rb_enc_isascii(c, enc)
Definition: encoding.h:178
Definition: nkf.c:99
#define RSTRING_END(str)
Definition: ruby.h:849
int rb_isspace(int c)
Definition: encoding.c:1930
VALUE rb_ary_replace(VALUE copy, VALUE orig)
Definition: array.c:3330
VALUE rb_ary_new(void)
Definition: array.c:495
int rb_ascii8bit_encindex(void)
Definition: encoding.c:1236
VALUE rb_enc_default_external(void)
Definition: encoding.c:1365
#define NIL_P(v)
Definition: ruby.h:438
#define ISASCII(c)
Definition: ruby.h:1766
static VALUE enc_name(VALUE self)
Definition: encoding.c:1079
#define ENC_CODERANGE_ASCIIONLY(obj)
Definition: encoding.h:53
VALUE rb_define_class(const char *name, VALUE super)
Defines a top-level class.
Definition: class.c:630
void rb_enc_set_index(VALUE obj, int idx)
Definition: encoding.c:790
int rb_enc_replicate(const char *name, rb_encoding *encoding)
Definition: encoding.c:380
void rb_define_const(VALUE, const char *, VALUE)
Definition: variable.c:2225
void rb_ary_store(VALUE ary, long idx, VALUE val)
Definition: array.c:790
#define ENCODING_COUNT
Definition: encoding.c:55
#define ISALNUM(c)
Definition: ruby.h:1773
static void set_encoding_const(const char *, rb_encoding *)
Definition: encoding.c:1544
static VALUE enc_dump(int argc, VALUE *argv, VALUE self)
Definition: encoding.c:1213
int rb_encdb_alias(const char *alias, const char *orig)
Definition: encoding.c:543
int argc
Definition: ruby.c:131
#define Qfalse
Definition: ruby.h:425
VALUE rb_require_safe(VALUE, int)
Definition: load.c:943
int rb_locale_encindex(void)
Definition: encoding.c:1272
#define realloc
Definition: ripper.c:97
#define ALLOCA_N(type, n)
Definition: ruby.h:1337
static rb_encoding * str_to_encoding(VALUE enc)
Definition: encoding.c:213
NORETURN(static void not_encoding(VALUE enc))
void rb_gc_register_mark_object(VALUE obj)
Definition: gc.c:4920
#define ISUPPER(c)
Definition: ruby.h:1771
#define MEMCPY(p1, p2, type, n)
Definition: ruby.h:1352
#define ENCDB_REGISTER(name, enc)
VALUE rb_enc_associate_index(VALUE obj, int idx)
Definition: encoding.c:798
VALUE rb_eEncCompatError
Definition: error.c:555
static rb_encoding * must_encoding(VALUE enc)
Definition: encoding.c:142
#define OBJ_FREEZE(x)
Definition: ruby.h:1186
#define rb_enc_mbminlen(enc)
Definition: encoding.h:128
#define ISLOWER(c)
Definition: ruby.h:1772
const char * name
Definition: encoding.c:41
#define ONIGENC_CONSTRUCT_MBCLEN_NEEDMORE(n)
Definition: oniguruma.h:252
int rb_isgraph(int c)
Definition: encoding.c:1926
#define ONIGENC_CODE_TO_MBCLEN(enc, code)
Definition: oniguruma.h:267
static int enc_set_default_encoding(struct default_encoding *def, VALUE encoding, const char *name)
Definition: encoding.c:1324
rb_encoding * rb_find_encoding(VALUE enc)
Definition: encoding.c:226
int rb_enc_ascget(const char *p, const char *e, int *len, rb_encoding *enc)
Definition: encoding.c:970
#define RSTRING_LEN(str)
Definition: ruby.h:841
int rb_encdb_dummy(const char *name)
Definition: encoding.c:447
static int enc_register(const char *name, rb_encoding *encoding)
Definition: encoding.c:291
SSL_METHOD *(* func)(void)
Definition: ossl_ssl.c:113
#define TRUE
Definition: nkf.h:175
#define T_DATA
Definition: ruby.h:492
static int enc_check_encoding(VALUE obj)
Definition: encoding.c:125
VALUE rb_sprintf(const char *format,...)
Definition: sprintf.c:1250
int rb_enc_precise_mbclen(const char *p, const char *e, rb_encoding *enc)
Definition: encoding.c:958
int rb_enc_unicode_p(rb_encoding *enc)
Definition: encoding.c:496
#define rb_enc_name(enc)
Definition: encoding.h:125
VALUE rb_hash_new(void)
Definition: hash.c:298
#define strdup(s)
Definition: util.h:67
int rb_scan_args(int argc, const VALUE *argv, const char *fmt,...)
Definition: class.c:1728
#define ONIGENC_ASCII_CODE_TO_UPPER_CASE(c)
Definition: regenc.h:187
VALUE rb_ivar_set(VALUE, ID, VALUE)
Definition: variable.c:1133
#define PRIsVALUE
Definition: ruby.h:137
#define ONIGENC_CTYPE_PRINT
Definition: oniguruma.h:201
unsigned long ID
Definition: ruby.h:89
rb_encoding * rb_usascii_encoding(void)
Definition: encoding.c:1257
int rb_encdb_replicate(const char *name, const char *orig)
Definition: encoding.c:425
#define ENCODING_NAMELEN_MAX
Definition: encoding.c:58
#define Qnil
Definition: ruby.h:427
int rb_define_dummy_encoding(const char *name)
Definition: encoding.c:437
static struct default_encoding default_internal
Definition: encoding.c:1433
const char * name
Definition: oniguruma.h:160
#define BUILTIN_TYPE(x)
Definition: ruby.h:502
#define debug(x)
Definition: _sdbm.c:52
unsigned long VALUE
Definition: ruby.h:88
static VALUE enc_compatible_p(VALUE klass, VALUE str1, VALUE str2)
Definition: encoding.c:1200
rb_encoding * rb_locale_encoding(void)
Definition: encoding.c:1294
#define ONIGENC_IS_ASCII_CODE(code)
Definition: regenc.h:185
int rb_utf8_encindex(void)
Definition: encoding.c:1251
#define ENCODING_SET_INLINED(obj, i)
Definition: encoding.h:31
int rb_enc_code_to_mbclen(int code, rb_encoding *enc)
Definition: encoding.c:1025
VALUE rb_obj_encoding(VALUE obj)
Definition: encoding.c:930
static int rb_enc_dummy_p(rb_encoding *enc)
Definition: encoding.h:245
static int enc_autoload(rb_encoding *)
Definition: encoding.c:659
#define rb_enc_asciicompat(enc)
Definition: encoding.h:188
#define ONIGENC_CTYPE_SPACE
Definition: oniguruma.h:203
int rb_islower(int c)
Definition: encoding.c:1927
void rb_enc_foreach_name(int(*func)(st_data_t name, st_data_t idx, st_data_t arg), st_data_t arg)
Definition: encoding.c:1947
static VALUE set_default_internal(VALUE klass, VALUE encoding)
Definition: encoding.c:1508
#define ONIGENC_IS_UNICODE(enc)
Definition: oniguruma.h:227
#define enc_autoload_p(enc)
Definition: encoding.c:61
#define FL_UNSET(x, f)
Definition: ruby.h:1173
#define UChar
Definition: oniguruma.h:108
static void enc_check_duplication(const char *name)
Definition: encoding.c:338
#define StringValueCStr(v)
Definition: ruby.h:541
static size_t enc_memsize(const void *p)
Definition: encoding.c:66
#define RSTRING_PTR(str)
Definition: ruby.h:845
static ID id_encoding
Definition: encoding.c:36
static int enc_names_i(st_data_t name, st_data_t idx, st_data_t args)
Definition: encoding.c:1085
rb_encoding * rb_enc_get(VALUE obj)
Definition: encoding.c:832
static VALUE get_default_external(VALUE klass)
Definition: encoding.c:1397
int size
Definition: encoding.c:49
static struct default_encoding default_external
Definition: encoding.c:1319
#define ONIGENC_FLAG_UNICODE
Definition: oniguruma.h:213
#define ONIGENC_PRECISE_MBC_ENC_LEN(enc, p, e)
Definition: oniguruma.h:256
#define ENCODING_INLINE_MAX
Definition: encoding.h:27
#define xmalloc
Definition: defines.h:108
void rb_set_errinfo(VALUE err)
Definition: eval.c:1504
static VALUE enc_replicate(VALUE encoding, VALUE name)
Definition: encoding.c:401
rb_encoding * enc
Definition: encoding.c:1316
VALUE rb_check_string_type(VALUE)
Definition: string.c:1679
void rb_enc_set_default_internal(VALUE encoding)
Definition: encoding.c:1488
static VALUE enc_ascii_compatible_p(VALUE enc)
Definition: encoding.c:487
uint8_t key[16]
Definition: random.c:1250
#define valid_encoding_name_p(name)
Definition: encoding.c:59
#define T_STRING
Definition: ruby.h:482
int rb_enc_alias(const char *alias, const char *orig)
Definition: encoding.c:528
static VALUE require_enc(VALUE enclib)
Definition: encoding.c:621
#define is_data_encoding(obj)
Definition: encoding.c:77
struct rb_encoding_entry * list
Definition: encoding.c:47
rb_encoding * rb_filesystem_encoding(void)
Definition: encoding.c:1309
static int enc_capable(VALUE obj)
Definition: encoding.c:716
rb_encoding * rb_enc_get_from_index(int index)
Definition: encoding.c:602
#define T_FILE
Definition: ruby.h:488
Definition: nkf.c:112
#define ENC_INDEX_MASK
Definition: encoding.h:237
static const rb_data_type_t encoding_data_type
Definition: encoding.c:71
#define rb_isascii(c)
Definition: ruby.h:1749
#define TOLOWER(c)
Definition: ruby.h:1779
static st_data_t enc_dup_name(st_data_t name)
Definition: encoding.c:502
int rb_isalnum(int c)
Definition: encoding.c:1921
VALUE rb_enc_default_internal(void)
Definition: encoding.c:1445
static int check_encoding(rb_encoding *enc)
Definition: encoding.c:113
static VALUE get_default_internal(VALUE klass)
Definition: encoding.c:1482
#define rb_safe_level()
Definition: tcltklib.c:95
int rb_tolower(int c)
Definition: encoding.c:1935
Definition: nkf.c:113
#define ruby_debug
Definition: ruby.h:1476
#define ENC_TO_ENCINDEX(enc)
Definition: encoding.h:239
const char * name
Definition: nkf.c:208
static int enc_replicate_with_index(const char *name, rb_encoding *origenc, int idx)
Definition: encoding.c:409
#define rb_errinfo()
Definition: tcltklib.c:90
unsigned int rb_enc_codepoint(const char *p, const char *e, rb_encoding *enc)
Definition: encoding.c:1008
#define STRCASECMP(s1, s2)
Definition: ruby.h:1783
#define rb_intern(str)
#define rb_enc_to_index(enc)
Definition: encoding.h:77
static int enc_alias(const char *alias, int idx)
Definition: encoding.c:519
rb_encoding * rb_ascii8bit_encoding(void)
Definition: encoding.c:1227
void rb_warning(const char *fmt,...)
Definition: error.c:236
int rb_enc_find_index(const char *name)
Definition: encoding.c:684
int rb_iscntrl(int c)
Definition: encoding.c:1924
#define rb_check_frozen(obj)
Definition: intern.h:277
#define CONST_ID(var, str)
Definition: ruby.h:1428
int rb_enc_register(const char *name, rb_encoding *encoding)
Definition: encoding.c:304
#define SPECIAL_CONST_P(x)
Definition: ruby.h:1165
Definition: nkf.c:108
static VALUE rb_encoding_list
Definition: encoding.c:38
#define ONIGENC_CTYPE_GRAPH
Definition: oniguruma.h:199
void rb_encdb_declare(const char *name)
Definition: encoding.c:328
int rb_enc_str_asciionly_p(VALUE)
Definition: string.c:448
void rb_gc_mark_encodings(void)
Definition: encoding.c:236
#define SYMBOL_P(x)
Definition: ruby.h:354
#define NULL
Definition: _sdbm.c:103
#define UNSPECIFIED_ENCODING
Definition: encoding.c:56
rb_encoding OnigEncodingUTF_8
void rb_define_method(VALUE klass, const char *name, VALUE(*func)(ANYARGS), int argc)
Definition: class.c:1488
#define ruby_verbose
Definition: ruby.h:1475
int st_foreach(st_table *, int(*)(ANYARGS), st_data_t)
Definition: st.c:1034
Definition: nkf.c:118
#define ONIGENC_CTYPE_LOWER
Definition: oniguruma.h:200
void rb_warn(const char *fmt,...)
Definition: error.c:223
rb_encoding * enc
Definition: encoding.c:42
VALUE rb_eArgError
Definition: error.c:549
static int load_encoding(const char *name)
Definition: encoding.c:628
rb_encoding * rb_enc_find(const char *name)
Definition: encoding.c:708
#define T_REGEXP
Definition: ruby.h:483
int Init_enc_set_filesystem_encoding(void)
Definition: localeinit.c:51
char ** argv
Definition: ruby.c:132
#define StringValue(v)
Definition: ruby.h:539
static VALUE enc_find(VALUE klass, VALUE enc)
Definition: encoding.c:1165
#define ONIGENC_ASCII_CODE_TO_LOWER_CASE(c)
Definition: regenc.h:186
#define ENC_DUMMY_P(enc)
Definition: encoding.h:241
int rb_isalpha(int c)
Definition: encoding.c:1922
rb_encoding * rb_enc_from_index(int index)
Definition: encoding.c:590
Definition: nkf.c:117
Definition: nkf.c:120
VALUE rb_obj_class(VALUE)
Definition: object.c:227
rb_encoding * base
Definition: encoding.c:43