Skip to content

Commit 58f1127

Browse files
committed
Revert "[ruby/prism] Add Ripper :on_sp events for Prism.lex_compat and Prism::Translation::Ripper"
This reverts commit 35a7b51. This broke syntax_suggest. https://github.com/ruby/ruby/actions/runs/21167011751/job/60874111912
1 parent 35a7b51 commit 58f1127

6 files changed

Lines changed: 19 additions & 106 deletions

File tree

lib/prism.rb

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -61,7 +61,8 @@ def initialize(version)
6161
# Prism::lex_compat(source, **options) -> LexCompat::Result
6262
#
6363
# Returns a parse result whose value is an array of tokens that closely
64-
# resembles the return value of Ripper::lex.
64+
# resembles the return value of Ripper::lex. The main difference is that the
65+
# `:on_sp` token is not emitted.
6566
#
6667
# For supported options, see Prism::parse.
6768
def self.lex_compat(source, **options)
@@ -71,8 +72,9 @@ def self.lex_compat(source, **options)
7172
# :call-seq:
7273
# Prism::lex_ripper(source) -> Array
7374
#
74-
# This wraps the result of Ripper.lex. It produces almost exactly the
75-
# same tokens. Raises SyntaxError if the syntax in source is invalid.
75+
# This lexes with the Ripper lex. It drops any space events but otherwise
76+
# returns the same tokens. Raises SyntaxError if the syntax in source is
77+
# invalid.
7678
def self.lex_ripper(source)
7779
LexRipper.new(source).result # steep:ignore
7880
end

lib/prism/lex_compat.rb

Lines changed: 8 additions & 93 deletions
Original file line numberDiff line numberDiff line change
@@ -226,7 +226,7 @@ def state
226226
end
227227

228228
# Tokens where state should be ignored
229-
# used for :on_sp, :on_comment, :on_heredoc_end, :on_embexpr_end
229+
# used for :on_comment, :on_heredoc_end, :on_embexpr_end
230230
class IgnoreStateToken < Token
231231
def ==(other) # :nodoc:
232232
self[0...-1] == other[0...-1]
@@ -611,10 +611,10 @@ def self.build(opening)
611611
BOM_FLUSHED = RUBY_VERSION >= "3.3.0"
612612
private_constant :BOM_FLUSHED
613613

614-
attr_reader :options
614+
attr_reader :source, :options
615615

616-
def initialize(code, **options)
617-
@code = code
616+
def initialize(source, **options)
617+
@source = source
618618
@options = options
619619
end
620620

@@ -624,14 +624,12 @@ def result
624624
state = :default
625625
heredoc_stack = [[]] #: Array[Array[Heredoc::PlainHeredoc | Heredoc::DashHeredoc | Heredoc::DedentingHeredoc]]
626626

627-
result = Prism.lex(@code, **options)
628-
source = result.source
627+
result = Prism.lex(source, **options)
629628
result_value = result.value
630629
previous_state = nil #: State?
631630
last_heredoc_end = nil #: Integer?
632-
eof_token = nil
633631

634-
bom = source.slice(0, 3) == "\xEF\xBB\xBF"
632+
bom = source.byteslice(0..2) == "\xEF\xBB\xBF"
635633

636634
result_value.each_with_index do |(token, lex_state), index|
637635
lineno = token.location.start_line
@@ -743,7 +741,6 @@ def result
743741

744742
Token.new([[lineno, column], event, value, lex_state])
745743
when :on_eof
746-
eof_token = token
747744
previous_token = result_value[index - 1][0]
748745

749746
# If we're at the end of the file and the previous token was a
@@ -766,7 +763,7 @@ def result
766763
end_offset += 3
767764
end
768765

769-
tokens << Token.new([[lineno, 0], :on_nl, source.slice(start_offset, end_offset - start_offset), lex_state])
766+
tokens << Token.new([[lineno, 0], :on_nl, source.byteslice(start_offset...end_offset), lex_state])
770767
end
771768
end
772769

@@ -860,89 +857,7 @@ def result
860857
# We sort by location to compare against Ripper's output
861858
tokens.sort_by!(&:location)
862859

863-
# Add :on_sp tokens
864-
tokens = add_on_sp_tokens(tokens, source, result.data_loc, bom, eof_token)
865-
866-
Result.new(tokens, result.comments, result.magic_comments, result.data_loc, result.errors, result.warnings, source)
867-
end
868-
869-
def add_on_sp_tokens(tokens, source, data_loc, bom, eof_token)
870-
new_tokens = []
871-
872-
prev_token_state = Translation::Ripper::Lexer::State.cached(Translation::Ripper::EXPR_BEG)
873-
prev_token_end = bom ? 3 : 0
874-
875-
tokens.each do |token|
876-
line, column = token.location
877-
start_offset = source.line_to_byte_offset(line) + column
878-
# Ripper reports columns on line 1 without counting the BOM, so we adjust to get the real offset
879-
start_offset += 3 if line == 1 && bom
880-
881-
if start_offset > prev_token_end
882-
sp_value = source.slice(prev_token_end, start_offset - prev_token_end)
883-
sp_line = source.line(prev_token_end)
884-
sp_column = source.column(prev_token_end)
885-
# Ripper reports columns on line 1 without counting the BOM
886-
sp_column -= 3 if sp_line == 1 && bom
887-
continuation_index = sp_value.byteindex("\\")
888-
889-
# ripper emits up to three :on_sp tokens when line continuations are used
890-
if continuation_index
891-
next_whitespace_index = continuation_index + 1
892-
next_whitespace_index += 1 if sp_value.byteslice(next_whitespace_index) == "\r"
893-
next_whitespace_index += 1
894-
first_whitespace = sp_value[0...continuation_index]
895-
continuation = sp_value[continuation_index...next_whitespace_index]
896-
second_whitespace = sp_value[next_whitespace_index..]
897-
898-
new_tokens << IgnoreStateToken.new([
899-
[sp_line, sp_column],
900-
:on_sp,
901-
first_whitespace,
902-
prev_token_state
903-
]) unless first_whitespace.empty?
904-
905-
new_tokens << IgnoreStateToken.new([
906-
[sp_line, sp_column + continuation_index],
907-
:on_sp,
908-
continuation,
909-
prev_token_state
910-
])
911-
912-
new_tokens << IgnoreStateToken.new([
913-
[sp_line + 1, 0],
914-
:on_sp,
915-
second_whitespace,
916-
prev_token_state
917-
]) unless second_whitespace.empty?
918-
else
919-
new_tokens << IgnoreStateToken.new([
920-
[sp_line, sp_column],
921-
:on_sp,
922-
sp_value,
923-
prev_token_state
924-
])
925-
end
926-
end
927-
928-
new_tokens << token
929-
prev_token_state = token.state
930-
prev_token_end = start_offset + token.value.bytesize
931-
end
932-
933-
unless data_loc # no trailing :on_sp with __END__ as it is always preceded by :on_nl
934-
end_offset = eof_token.location.end_offset
935-
if prev_token_end < end_offset
936-
new_tokens << IgnoreStateToken.new([
937-
[source.line(prev_token_end), source.column(prev_token_end)],
938-
:on_sp,
939-
source.slice(prev_token_end, end_offset - prev_token_end),
940-
prev_token_state
941-
])
942-
end
943-
end
944-
945-
new_tokens
860+
Result.new(tokens, result.comments, result.magic_comments, result.data_loc, result.errors, result.warnings, Source.for(source))
946861
end
947862
end
948863

lib/prism/lex_ripper.rb

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,8 @@ def result
1919

2020
lex(source).each do |token|
2121
case token[1]
22+
when :on_sp
23+
# skip
2224
when :on_tstring_content
2325
if previous[1] == :on_tstring_content && (token[2].start_with?("\#$") || token[2].start_with?("\#@"))
2426
previous[2] << token[2]

test/prism/fixtures/bom_leading_space.txt

Lines changed: 0 additions & 1 deletion
This file was deleted.

test/prism/fixtures/bom_spaces.txt

Lines changed: 0 additions & 1 deletion
This file was deleted.

test/prism/ruby/ripper_test.rb

Lines changed: 4 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -39,8 +39,6 @@ class RipperTest < TestCase
3939

4040
# Skip these tests that we haven't implemented yet.
4141
omitted_sexp_raw = [
42-
"bom_leading_space.txt",
43-
"bom_spaces.txt",
4442
"dos_endings.txt",
4543
"heredocs_with_fake_newlines.txt",
4644
"heredocs_with_ignored_newlines.txt",
@@ -94,7 +92,7 @@ def test_lexer
9492
assert_equal(expected, lexer.parse[0].to_a)
9593
assert_equal(lexer.parse[0].to_a, lexer.scan[0].to_a)
9694

97-
assert_equal(%i[on_int on_sp on_op], Translation::Ripper::Lexer.new("1 +").lex.map(&:event))
95+
assert_equal(%i[on_int on_op], Translation::Ripper::Lexer.new("1 +").lex.map(&:event))
9896
assert_raise(SyntaxError) { Translation::Ripper::Lexer.new("1 +").lex(raise_errors: true) }
9997
end
10098

@@ -123,17 +121,15 @@ def assert_ripper_sexp_raw(source)
123121
def assert_ripper_lex(source)
124122
prism = Translation::Ripper.lex(source)
125123
ripper = Ripper.lex(source)
126-
127-
# Prism emits tokens by their order in the code, not in parse order
128-
ripper.sort_by! { |elem| elem[0] }
124+
ripper.reject! { |elem| elem[1] == :on_sp } # Prism doesn't emit on_sp
125+
ripper.sort_by! { |elem| elem[0] } # Prism emits tokens by their order in the code, not in parse order
129126

130127
[prism.size, ripper.size].max.times do |i|
131128
expected = ripper[i]
132129
actual = prism[i]
133-
134130
# Since tokens related to heredocs are not emitted in the same order,
135131
# the state also doesn't line up.
136-
if expected && actual && expected[1] == :on_heredoc_end && actual[1] == :on_heredoc_end
132+
if expected[1] == :on_heredoc_end && actual[1] == :on_heredoc_end
137133
expected[3] = actual[3] = nil
138134
end
139135

0 commit comments

Comments
 (0)