Skip to content

Commit 11d29d3

Browse files
committed
file.c: strrdirsep search from the back of the string for common encodings
`strrdirsep` quite innficiently search for the last separator from the front of the string. This is surprising but necessary because in Shift-JS, `0x5c` can be the second byte of some multi-byte characters, as such it's not possible to do a pure ASCII search. And it's even more costly because for each character we need to do expensive checks to handle this possibility. However in the overwhelming majority of cases, paths are encoded in UTF-8 or ASCII, so for these common encodings we can use the more logical and efficient algorithm. ``` compare-ruby: ruby 4.1.0dev (2026-01-17T14:40:03Z master 00a3b71) +PRISM [arm64-darwin25] built-ruby: ruby 4.1.0dev (2026-01-19T07:43:57Z file-dirname-lower.. a8d3535e5b) +PRISM [arm64-darwin25] ``` | |compare-ruby|built-ruby| |:------|-----------:|---------:| |long | 3.974M| 23.674M| | | -| 5.96x| |short | 15.281M| 29.034M| | | -| 1.90x|
1 parent 27bb162 commit 11d29d3

1 file changed

Lines changed: 44 additions & 6 deletions

File tree

file.c

Lines changed: 44 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -3693,7 +3693,6 @@ skipprefixroot(const char *path, const char *end, rb_encoding *enc)
36933693
#endif
36943694
}
36953695

3696-
#define strrdirsep rb_enc_path_last_separator
36973696
char *
36983697
rb_enc_path_last_separator(const char *path, const char *end, rb_encoding *enc)
36993698
{
@@ -3712,6 +3711,30 @@ rb_enc_path_last_separator(const char *path, const char *end, rb_encoding *enc)
37123711
return last;
37133712
}
37143713

3714+
static inline char *
3715+
strrdirsep(const char *path, const char *end, rb_encoding *enc)
3716+
{
3717+
if (RB_LIKELY(enc == NULL)) {
3718+
const char *cursor = end - 1;
3719+
3720+
while (isdirsep(cursor[0])) {
3721+
cursor--;
3722+
}
3723+
3724+
while (cursor >= path) {
3725+
if (isdirsep(cursor[0])) {
3726+
while (cursor > path && isdirsep(cursor[-1])) {
3727+
cursor--;
3728+
}
3729+
return (char *)cursor;
3730+
}
3731+
cursor--;
3732+
}
3733+
return NULL;
3734+
}
3735+
return rb_enc_path_last_separator(path, end, enc);
3736+
}
3737+
37153738
static char *
37163739
chompdirsep(const char *path, const char *end, rb_encoding *enc)
37173740
{
@@ -5036,6 +5059,15 @@ rb_file_dirname(VALUE fname)
50365059
return rb_file_dirname_n(fname, 1);
50375060
}
50385061

5062+
static inline rb_encoding *
5063+
path_enc_get(VALUE str)
5064+
{
5065+
if (RB_LIKELY(rb_str_enc_fastpath(str))) {
5066+
return NULL;
5067+
}
5068+
return rb_str_enc_get(str);
5069+
}
5070+
50395071
static VALUE
50405072
rb_file_dirname_n(VALUE fname, int n)
50415073
{
@@ -5048,7 +5080,7 @@ rb_file_dirname_n(VALUE fname, int n)
50485080
if (n < 0) rb_raise(rb_eArgError, "negative level: %d", n);
50495081
CheckPath(fname, name);
50505082
end = name + RSTRING_LEN(fname);
5051-
enc = rb_str_enc_get(fname);
5083+
enc = path_enc_get(fname);
50525084
root = skiproot(name, end, enc);
50535085
#ifdef DOSISH_UNC
50545086
if (root > name + 1 && isdirsep(*name))
@@ -5082,26 +5114,32 @@ rb_file_dirname_n(VALUE fname, int n)
50825114
if (i == n) i = 0;
50835115
}
50845116
else {
5085-
Inc(p, end, enc);
5117+
if (RB_UNLIKELY(enc)) {
5118+
Inc(p, end, enc);
5119+
}
5120+
else {
5121+
p++;
5122+
}
50865123
}
50875124
}
50885125
p = seps[i];
50895126
ALLOCV_END(sepsv);
50905127
break;
50915128
}
50925129
}
5130+
50935131
if (p == name) {
5094-
return rb_enc_str_new(".", 1, enc);
5132+
return rb_enc_str_new(".", 1, rb_str_enc_get(fname));
50955133
}
50965134
#ifdef DOSISH_DRIVE_LETTER
50975135
if (has_drive_letter(name) && isdirsep(*(name + 2))) {
50985136
const char *top = skiproot(name + 2, end, enc);
5099-
dirname = rb_enc_str_new(name, 3, enc);
5137+
dirname = rb_enc_str_new(name, 3, rb_str_enc_get(fname));
51005138
rb_str_cat(dirname, top, p - top);
51015139
}
51025140
else
51035141
#endif
5104-
dirname = rb_enc_str_new(name, p - name, enc);
5142+
dirname = rb_enc_str_new(name, p - name, rb_str_enc_get(fname));
51055143
#ifdef DOSISH_DRIVE_LETTER
51065144
if (has_drive_letter(name) && root == name + 2 && p - name == 2)
51075145
rb_str_cat(dirname, ".", 1);

0 commit comments

Comments
 (0)