Skip to content

Commit 53fe993

Browse files
committed
Optimize File.extname for common encodings
Similar optimizations to the ones performed in rubyGH-15907. - Skip the expensive multi-byte encoding handling for the common encodings that are known to be safe. - Use `CheckPath` to save on copying the argument and only scan it for NULL bytes once. - Create the return string with rb_enc_str_new instead of rb_str_subseq as it's going to be a very small string anyway. This could be optimized a little bit further by searching for both `.` and `dirsep` in one pass, ``` compare-ruby: ruby 4.1.0dev (2026-01-19T03:51:30Z master 631bf19) +PRISM [arm64-darwin25] built-ruby: ruby 4.1.0dev (2026-01-20T07:33:42Z master 6fb5043) +PRISM [arm64-darwin25] ``` | |compare-ruby|built-ruby| |:----------|-----------:|---------:| |long | 3.606M| 22.229M| | | -| 6.17x| |long_name | 2.254M| 13.416M| | | -| 5.95x| |short | 16.488M| 29.969M| | | -| 1.82x|
1 parent 6fb5043 commit 53fe993

2 files changed

Lines changed: 40 additions & 27 deletions

File tree

benchmark/file_extname.yml

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
prelude: |
2+
# frozen_string_literal: true
3+
benchmark:
4+
long: File.extname("/Users/george/src/github.com/ruby/ruby/benchmark/file_dirname.yml")
5+
long_name: File.extname("Users_george_src_github.com_ruby_ruby_benchmark_file_dirname.yml")
6+
short: File.extname("foo/bar")

file.c

Lines changed: 34 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -5126,24 +5126,12 @@ rb_file_dirname_n(VALUE fname, int n)
51265126
return dirname;
51275127
}
51285128

5129-
/*
5130-
* accept a String, and return the pointer of the extension.
5131-
* if len is passed, set the length of extension to it.
5132-
* returned pointer is in ``name'' or NULL.
5133-
* returns *len
5134-
* no dot NULL 0
5135-
* dotfile top 0
5136-
* end with dot dot 1
5137-
* .ext dot len of .ext
5138-
* .ext:stream dot len of .ext without :stream (NTFS only)
5139-
*
5140-
*/
5141-
const char *
5142-
ruby_enc_find_extname(const char *name, long *len, rb_encoding *enc)
5129+
static inline const char *
5130+
enc_find_extname(const char *name, long *len, bool mb_enc, rb_encoding *enc)
51435131
{
51445132
const char *p, *e, *end = name + (len ? *len : (long)strlen(name));
51455133

5146-
p = strrdirsep(name, end, true, enc); /* get the last path component */
5134+
p = strrdirsep(name, end, mb_enc, enc); /* get the last path component */
51475135
if (!p)
51485136
p = name;
51495137
else
@@ -5176,7 +5164,7 @@ ruby_enc_find_extname(const char *name, long *len, rb_encoding *enc)
51765164
#endif
51775165
else if (isdirsep(*p))
51785166
break;
5179-
Inc(p, end, true, enc);
5167+
Inc(p, end, mb_enc, enc);
51805168
}
51815169

51825170
if (len) {
@@ -5191,6 +5179,24 @@ ruby_enc_find_extname(const char *name, long *len, rb_encoding *enc)
51915179
return e;
51925180
}
51935181

5182+
/*
5183+
* accept a String, and return the pointer of the extension.
5184+
* if len is passed, set the length of extension to it.
5185+
* returned pointer is in ``name'' or NULL.
5186+
* returns *len
5187+
* no dot NULL 0
5188+
* dotfile top 0
5189+
* end with dot dot 1
5190+
* .ext dot len of .ext
5191+
* .ext:stream dot len of .ext without :stream (NTFS only)
5192+
*
5193+
*/
5194+
const char *
5195+
ruby_enc_find_extname(const char *name, long *len, rb_encoding *enc)
5196+
{
5197+
return enc_find_extname(name, len, true, enc);
5198+
}
5199+
51945200
/*
51955201
* call-seq:
51965202
* File.extname(path) -> string
@@ -5220,18 +5226,19 @@ ruby_enc_find_extname(const char *name, long *len, rb_encoding *enc)
52205226
static VALUE
52215227
rb_file_s_extname(VALUE klass, VALUE fname)
52225228
{
5223-
const char *name, *e;
5224-
long len;
5225-
VALUE extname;
5229+
const char *name;
5230+
CheckPath(fname, name);
5231+
long len = RSTRING_LEN(fname);
52265232

5227-
FilePathStringValue(fname);
5228-
name = StringValueCStr(fname);
5229-
len = RSTRING_LEN(fname);
5230-
e = ruby_enc_find_extname(name, &len, rb_enc_get(fname));
5231-
if (len < 1)
5232-
return rb_str_new(0, 0);
5233-
extname = rb_str_subseq(fname, e - name, len); /* keep the dot, too! */
5234-
return extname;
5233+
if (len < 1) {
5234+
return rb_enc_str_new(0, 0, rb_str_enc_get(fname));
5235+
}
5236+
5237+
bool mb_enc = !rb_str_enc_fastpath(fname);
5238+
rb_encoding *enc = rb_str_enc_get(fname);
5239+
5240+
const char *ext = enc_find_extname(name, &len, mb_enc, enc);
5241+
return rb_enc_str_new(ext, len, enc);
52355242
}
52365243

52375244
/*

0 commit comments

Comments
 (0)