Skip to content

Commit cb03bf4

Browse files
committed
Freeze all instances of Regexp, except subclass instances
* Instances of a subclass of Regexp are not frozen for compatibility. * [Feature ruby#8948] * Use a less confusing example in test_regexp2, the ivar was named @encoding but had no effect on Regexp#encoding.
1 parent be659a0 commit cb03bf4

16 files changed

Lines changed: 175 additions & 57 deletions

File tree

NEWS.md

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,12 @@ Note: We're only listing outstanding class updates.
4242
* `Array#pack` accepts a new format `R` and `r` for unpacking unsigned
4343
and signed LEB128 encoded integers. [[Feature #21785]]
4444

45+
* Regexp
46+
47+
* All instances of `Regexp` are now frozen, not just literals.
48+
Subclasses of `Regexp` are not frozen for compatibility.
49+
[[Feature #8948]]
50+
4551
* Set
4652

4753
* A deprecated behavior, `Set#to_set`, `Range#to_set`, and
@@ -140,6 +146,7 @@ A lot of work has gone into making Ractors more stable, performant, and usable.
140146
## JIT
141147

142148
[Feature #6012]: https://bugs.ruby-lang.org/issues/6012
149+
[Feature #8948]: https://bugs.ruby-lang.org/issues/8948
143150
[Feature #15330]: https://bugs.ruby-lang.org/issues/15330
144151
[Feature #21390]: https://bugs.ruby-lang.org/issues/21390
145152
[Feature #21785]: https://bugs.ruby-lang.org/issues/21785

depend

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8862,6 +8862,7 @@ marshal.$(OBJEXT): $(top_srcdir)/internal/hash.h
88628862
marshal.$(OBJEXT): $(top_srcdir)/internal/imemo.h
88638863
marshal.$(OBJEXT): $(top_srcdir)/internal/numeric.h
88648864
marshal.$(OBJEXT): $(top_srcdir)/internal/object.h
8865+
marshal.$(OBJEXT): $(top_srcdir)/internal/re.h
88658866
marshal.$(OBJEXT): $(top_srcdir)/internal/sanitizers.h
88668867
marshal.$(OBJEXT): $(top_srcdir)/internal/serial.h
88678868
marshal.$(OBJEXT): $(top_srcdir)/internal/set_table.h

internal/re.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
#include "ruby/ruby.h" /* for VALUE */
1313

1414
/* re.c */
15+
VALUE rb_reg_s_alloc(VALUE klass);
1516
VALUE rb_reg_compile(VALUE str, int options, const char *sourcefile, int sourceline);
1617
VALUE rb_reg_check_preprocess(VALUE);
1718
long rb_reg_search0(VALUE, VALUE, long, int, int, VALUE *);

marshal.c

Lines changed: 14 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@
3030
#include "internal/hash.h"
3131
#include "internal/numeric.h"
3232
#include "internal/object.h"
33+
#include "internal/re.h"
3334
#include "internal/struct.h"
3435
#include "internal/symbol.h"
3536
#include "internal/util.h"
@@ -1731,7 +1732,10 @@ r_ivar_encoding(VALUE obj, struct load_arg *arg, VALUE sym, VALUE val)
17311732
int idx = sym2encidx(sym, val);
17321733
if (idx >= 0) {
17331734
if (rb_enc_capable(obj)) {
1734-
rb_enc_associate_index(obj, idx);
1735+
// Check if needed to avoid rb_check_frozen() check for Regexps
1736+
if (rb_enc_get_index(obj) != idx) {
1737+
rb_enc_associate_index(obj, idx);
1738+
}
17351739
}
17361740
else {
17371741
rb_raise(rb_eArgError, "%"PRIsVALUE" is not enc_capable", obj);
@@ -1854,17 +1858,17 @@ append_extmod(VALUE obj, VALUE extmod)
18541858
override_ivar_error(type, str); \
18551859
} while (0)
18561860

1857-
static VALUE r_object_for(struct load_arg *arg, bool partial, int *ivp, VALUE extmod, int type);
1861+
static VALUE r_object_for(struct load_arg *arg, bool partial, int *ivp, VALUE klass, VALUE extmod, int type);
18581862

18591863
static VALUE
18601864
r_object0(struct load_arg *arg, bool partial, int *ivp, VALUE extmod)
18611865
{
18621866
int type = r_byte(arg);
1863-
return r_object_for(arg, partial, ivp, extmod, type);
1867+
return r_object_for(arg, partial, ivp, 0, extmod, type);
18641868
}
18651869

18661870
static VALUE
1867-
r_object_for(struct load_arg *arg, bool partial, int *ivp, VALUE extmod, int type)
1871+
r_object_for(struct load_arg *arg, bool partial, int *ivp, VALUE klass, VALUE extmod, int type)
18681872
{
18691873
VALUE (*hash_new_with_size)(st_index_t) = rb_hash_new_with_size;
18701874
VALUE v = Qnil;
@@ -1940,12 +1944,12 @@ r_object_for(struct load_arg *arg, bool partial, int *ivp, VALUE extmod, int typ
19401944
}
19411945
type = r_byte(arg);
19421946
if ((c == rb_cHash) &&
1943-
/* Hack for compare_by_identify */
1947+
/* Hack for compare_by_identity */
19441948
(type == TYPE_HASH || type == TYPE_HASH_DEF)) {
19451949
hash_new_with_size = rb_ident_hash_new_with_size;
19461950
goto type_hash;
19471951
}
1948-
v = r_object_for(arg, partial, 0, extmod, type);
1952+
v = r_object_for(arg, partial, 0, c, extmod, type);
19491953
if (RB_SPECIAL_CONST_P(v) || RB_TYPE_P(v, T_OBJECT) || RB_TYPE_P(v, T_CLASS)) {
19501954
goto format_error;
19511955
}
@@ -2082,7 +2086,10 @@ r_object_for(struct load_arg *arg, bool partial, int *ivp, VALUE extmod, int typ
20822086
}
20832087
rb_str_set_len(str, dst - ptr);
20842088
}
2085-
VALUE regexp = rb_reg_new_str(str, options);
2089+
if (!klass) {
2090+
klass = rb_cRegexp;
2091+
}
2092+
VALUE regexp = rb_reg_init_str(rb_reg_s_alloc(klass), str, options);
20862093
r_copy_ivar(regexp, str);
20872094

20882095
v = r_entry0(regexp, idx, arg);

re.c

Lines changed: 11 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -3368,6 +3368,9 @@ rb_reg_initialize(VALUE obj, const char *s, long len, rb_encoding *enc,
33683368
options & ARG_REG_OPTION_MASK, err,
33693369
sourcefile, sourceline);
33703370
if (!re->ptr) return -1;
3371+
if (RBASIC_CLASS(obj) == rb_cRegexp) {
3372+
OBJ_FREEZE(obj);
3373+
}
33713374
RB_GC_GUARD(unescaped);
33723375
return 0;
33733376
}
@@ -3407,7 +3410,7 @@ rb_reg_initialize_str(VALUE obj, VALUE str, int options, onig_errmsg_buffer err,
34073410
return ret;
34083411
}
34093412

3410-
static VALUE
3413+
VALUE
34113414
rb_reg_s_alloc(VALUE klass)
34123415
{
34133416
NEWOBJ_OF(re, struct RRegexp, klass, T_REGEXP | (RGENGC_WB_PROTECTED_REGEXP ? FL_WB_PROTECTED : 0), sizeof(struct RRegexp), 0);
@@ -3460,9 +3463,7 @@ rb_reg_init_str_enc(VALUE re, VALUE s, rb_encoding *enc, int options)
34603463
VALUE
34613464
rb_reg_new_ary(VALUE ary, int opt)
34623465
{
3463-
VALUE re = rb_reg_new_str(rb_reg_preprocess_dregexp(ary, opt), opt);
3464-
rb_obj_freeze(re);
3465-
return re;
3466+
return rb_reg_new_str(rb_reg_preprocess_dregexp(ary, opt), opt);
34663467
}
34673468

34683469
VALUE
@@ -3496,7 +3497,6 @@ rb_reg_compile(VALUE str, int options, const char *sourcefile, int sourceline)
34963497
rb_set_errinfo(rb_reg_error_desc(str, options, err));
34973498
return Qnil;
34983499
}
3499-
rb_obj_freeze(re);
35003500
return re;
35013501
}
35023502

@@ -4033,6 +4033,9 @@ reg_copy(VALUE copy, VALUE orig)
40334033
RREGEXP_PTR(copy)->timelimit = RREGEXP_PTR(orig)->timelimit;
40344034
rb_enc_copy(copy, orig);
40354035
FL_SET_RAW(copy, FL_TEST_RAW(orig, KCODE_FIXED|REG_ENCODING_NONE));
4036+
if (RBASIC_CLASS(copy) == rb_cRegexp) {
4037+
OBJ_FREEZE(copy);
4038+
}
40364039

40374040
return copy;
40384041
}
@@ -4115,6 +4118,9 @@ rb_reg_initialize_m(int argc, VALUE *argv, VALUE self)
41154118
}
41164119

41174120
set_timeout(&RREGEXP_PTR(self)->timelimit, args.timeout);
4121+
if (RBASIC_CLASS(self) == rb_cRegexp) {
4122+
OBJ_FREEZE(self);
4123+
}
41184124

41194125
return self;
41204126
}

spec/ruby/core/marshal/dump_spec.rb

Lines changed: 17 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -473,14 +473,26 @@ def _dump(level)
473473
Marshal.dump(//im).should == "\x04\bI/\x00\x05\x06:\x06EF"
474474
end
475475

476-
it "dumps a Regexp with instance variables" do
477-
o = Regexp.new("")
476+
it "dumps a Regexp subclass with instance variables" do
477+
o = UserRegexp.new("")
478478
o.instance_variable_set(:@ivar, :ivar)
479-
Marshal.dump(o).should == "\x04\bI/\x00\x00\a:\x06EF:\n@ivar:\tivar"
479+
Marshal.dump(o).should == "\x04\bIC:\x0FUserRegexp/\x00\x00\a:\x06EF:\n@ivar:\tivar"
480480
end
481481

482-
it "dumps an extended Regexp" do
483-
Marshal.dump(Regexp.new("").extend(Meths)).should == "\x04\bIe:\nMeths/\x00\x00\x06:\x06EF"
482+
it "dumps an extended Regexp subclass" do
483+
Marshal.dump(UserRegexp.new("").extend(Meths)).should == "\x04\bIe:\nMethsC:\x0FUserRegexp/\x00\x00\x06:\x06EF"
484+
end
485+
486+
ruby_version_is ""..."4.1" do
487+
it "dumps a Regexp with instance variables" do
488+
o = Regexp.new("")
489+
o.instance_variable_set(:@ivar, :ivar)
490+
Marshal.dump(o).should == "\x04\bI/\x00\x00\a:\x06EF:\n@ivar:\tivar"
491+
end
492+
493+
it "dumps an extended Regexp" do
494+
Marshal.dump(Regexp.new("").extend(Meths)).should == "\x04\bIe:\nMeths/\x00\x00\x06:\x06EF"
495+
end
484496
end
485497

486498
it "dumps a Regexp subclass" do

spec/ruby/core/marshal/shared/load.rb

Lines changed: 43 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -901,14 +901,52 @@ def io.binmode; raise "binmode"; end
901901
end
902902

903903
describe "for a Regexp" do
904-
it "loads an extended Regexp" do
905-
obj = /[a-z]/.dup.extend(Meths, MethsMore)
906-
new_obj = Marshal.send(@method, "\004\be:\nMethse:\016MethsMore/\n[a-z]\000")
904+
ruby_version_is "4.1" do
905+
it "raises FrozenError for an extended Regexp" do
906+
-> {
907+
Marshal.send(@method, "\004\be:\nMethse:\016MethsMore/\n[a-z]\000")
908+
}.should raise_error(FrozenError)
909+
end
910+
911+
it "raises FrozenError when regexp has instance variables" do
912+
-> {
913+
Marshal.send(@method, "\x04\bI/\nhello\x00\a:\x06EF:\x11@regexp_ivar[\x06i/")
914+
}.should raise_error(FrozenError)
915+
end
916+
end
917+
918+
ruby_version_is ""..."4.1" do
919+
it "loads an extended Regexp" do
920+
obj = /[a-z]/.dup.extend(Meths, MethsMore)
921+
new_obj = Marshal.send(@method, "\004\be:\nMethse:\016MethsMore/\n[a-z]\000")
922+
923+
new_obj.should == obj
924+
new_obj_metaclass_ancestors = class << new_obj; ancestors; end
925+
new_obj_metaclass_ancestors[@num_self_class, 3].should ==
926+
[Meths, MethsMore, Regexp]
927+
end
928+
929+
it "restore the regexp instance variables" do
930+
obj = Regexp.new("hello")
931+
obj.instance_variable_set(:@regexp_ivar, [42])
932+
933+
new_obj = Marshal.send(@method, "\x04\bI/\nhello\x00\a:\x06EF:\x11@regexp_ivar[\x06i/")
934+
new_obj.instance_variables.should == [:@regexp_ivar]
935+
new_obj.instance_variable_get(:@regexp_ivar).should == [42]
936+
end
937+
end
938+
939+
it "loads a Regexp subclass instance variables" do
940+
obj = UserRegexp.new('abc')
941+
obj.instance_variable_set(:@noise, 'much')
942+
943+
new_obj = Marshal.send(@method, Marshal.dump(obj))
907944

908945
new_obj.should == obj
946+
new_obj.instance_variable_get(:@noise).should == 'much'
909947
new_obj_metaclass_ancestors = class << new_obj; ancestors; end
910-
new_obj_metaclass_ancestors[@num_self_class, 3].should ==
911-
[Meths, MethsMore, Regexp]
948+
new_obj_metaclass_ancestors[@num_self_class, 2].should ==
949+
[UserRegexp, Regexp]
912950
end
913951

914952
it "loads a Regexp subclass instance variables when it is extended with a module" do
@@ -924,15 +962,6 @@ def io.binmode; raise "binmode"; end
924962
[Meths, UserRegexp, Regexp]
925963
end
926964

927-
it "restore the regexp instance variables" do
928-
obj = Regexp.new("hello")
929-
obj.instance_variable_set(:@regexp_ivar, [42])
930-
931-
new_obj = Marshal.send(@method, "\x04\bI/\nhello\x00\a:\x06EF:\x11@regexp_ivar[\x06i/")
932-
new_obj.instance_variables.should == [:@regexp_ivar]
933-
new_obj.instance_variable_get(:@regexp_ivar).should == [42]
934-
end
935-
936965
it "preserves Regexp encoding" do
937966
source_object = Regexp.new("a".encode("utf-32le"))
938967
regexp = Marshal.send(@method, Marshal.dump(source_object))

spec/ruby/core/regexp/initialize_spec.rb

Lines changed: 16 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,21 @@
99
-> { //.send(:initialize, "") }.should raise_error(FrozenError)
1010
end
1111

12-
it "raises a TypeError on an initialized non-literal Regexp" do
13-
-> { Regexp.new("").send(:initialize, "") }.should raise_error(TypeError)
12+
ruby_version_is "4.1" do
13+
it "raises a FrozenError on an initialized non-literal Regexp" do
14+
regexp = Regexp.new("")
15+
-> { regexp.send(:initialize, "") }.should raise_error(FrozenError)
16+
end
17+
end
18+
19+
ruby_version_is ""..."4.1" do
20+
it "raises a TypeError on an initialized non-literal Regexp" do
21+
-> { Regexp.new("").send(:initialize, "") }.should raise_error(TypeError)
22+
end
23+
end
24+
25+
it "raises a TypeError on an initialized non-literal Regexp subclass" do
26+
r = Class.new(Regexp).new("")
27+
-> { r.send(:initialize, "") }.should raise_error(TypeError)
1428
end
1529
end

spec/ruby/core/regexp/shared/new.rb

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,12 @@
55
Regexp.send(@method, '').is_a?(Regexp).should == true
66
end
77

8+
ruby_version_is "4.1" do
9+
it "is frozen" do
10+
Regexp.send(@method, '').should.frozen?
11+
end
12+
end
13+
814
it "works by default for subclasses with overridden #initialize" do
915
class RegexpSpecsSubclass < Regexp
1016
def initialize(*args)

spec/ruby/core/string/match_spec.rb

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -137,9 +137,17 @@ def obj.method_missing(*args) "." end
137137
end
138138

139139
it "calls match on the regular expression" do
140-
regexp = /./.dup
141-
regexp.should_receive(:match).and_return(:foo)
142-
'hello'.match(regexp).should == :foo
140+
# Can't use regexp.should_receive(:match).and_return(:foo) since regexps are frozen
141+
ScratchPad.clear
142+
regexp = Class.new(Regexp) {
143+
def match(*args)
144+
ScratchPad.record [:match, *args]
145+
super(*args)
146+
end
147+
}.new('.')
148+
149+
'hello'.match(regexp)
150+
ScratchPad.recorded.should == [:match, 'hello']
143151
end
144152
end
145153

0 commit comments

Comments
 (0)