@@ -226,7 +226,7 @@ def state
226226 end
227227
228228 # Tokens where state should be ignored
229- # used for :on_sp, : on_comment, :on_heredoc_end, :on_embexpr_end
229+ # used for :on_comment, :on_heredoc_end, :on_embexpr_end
230230 class IgnoreStateToken < Token
231231 def ==( other ) # :nodoc:
232232 self [ 0 ...-1 ] == other [ 0 ...-1 ]
@@ -611,10 +611,10 @@ def self.build(opening)
611611 BOM_FLUSHED = RUBY_VERSION >= "3.3.0"
612612 private_constant :BOM_FLUSHED
613613
614- attr_reader :options
614+ attr_reader :source , : options
615615
616- def initialize ( code , **options )
617- @code = code
616+ def initialize ( source , **options )
617+ @source = source
618618 @options = options
619619 end
620620
@@ -624,14 +624,12 @@ def result
624624 state = :default
625625 heredoc_stack = [ [ ] ] #: Array[Array[Heredoc::PlainHeredoc | Heredoc::DashHeredoc | Heredoc::DedentingHeredoc]]
626626
627- result = Prism . lex ( @code , **options )
628- source = result . source
627+ result = Prism . lex ( source , **options )
629628 result_value = result . value
630629 previous_state = nil #: State?
631630 last_heredoc_end = nil #: Integer?
632- eof_token = nil
633631
634- bom = source . slice ( 0 , 3 ) == "\xEF \xBB \xBF "
632+ bom = source . byteslice ( 0 .. 2 ) == "\xEF \xBB \xBF "
635633
636634 result_value . each_with_index do |( token , lex_state ) , index |
637635 lineno = token . location . start_line
@@ -743,7 +741,6 @@ def result
743741
744742 Token . new ( [ [ lineno , column ] , event , value , lex_state ] )
745743 when :on_eof
746- eof_token = token
747744 previous_token = result_value [ index - 1 ] [ 0 ]
748745
749746 # If we're at the end of the file and the previous token was a
@@ -766,7 +763,7 @@ def result
766763 end_offset += 3
767764 end
768765
769- tokens << Token . new ( [ [ lineno , 0 ] , :on_nl , source . slice ( start_offset , end_offset - start_offset ) , lex_state ] )
766+ tokens << Token . new ( [ [ lineno , 0 ] , :on_nl , source . byteslice ( start_offset ... end_offset ) , lex_state ] )
770767 end
771768 end
772769
@@ -860,89 +857,7 @@ def result
860857 # We sort by location to compare against Ripper's output
861858 tokens . sort_by! ( &:location )
862859
863- # Add :on_sp tokens
864- tokens = add_on_sp_tokens ( tokens , source , result . data_loc , bom , eof_token )
865-
866- Result . new ( tokens , result . comments , result . magic_comments , result . data_loc , result . errors , result . warnings , source )
867- end
868-
869- def add_on_sp_tokens ( tokens , source , data_loc , bom , eof_token )
870- new_tokens = [ ]
871-
872- prev_token_state = Translation ::Ripper ::Lexer ::State . cached ( Translation ::Ripper ::EXPR_BEG )
873- prev_token_end = bom ? 3 : 0
874-
875- tokens . each do |token |
876- line , column = token . location
877- start_offset = source . line_to_byte_offset ( line ) + column
878- # Ripper reports columns on line 1 without counting the BOM, so we adjust to get the real offset
879- start_offset += 3 if line == 1 && bom
880-
881- if start_offset > prev_token_end
882- sp_value = source . slice ( prev_token_end , start_offset - prev_token_end )
883- sp_line = source . line ( prev_token_end )
884- sp_column = source . column ( prev_token_end )
885- # Ripper reports columns on line 1 without counting the BOM
886- sp_column -= 3 if sp_line == 1 && bom
887- continuation_index = sp_value . byteindex ( "\\ " )
888-
889- # ripper emits up to three :on_sp tokens when line continuations are used
890- if continuation_index
891- next_whitespace_index = continuation_index + 1
892- next_whitespace_index += 1 if sp_value . byteslice ( next_whitespace_index ) == "\r "
893- next_whitespace_index += 1
894- first_whitespace = sp_value [ 0 ...continuation_index ]
895- continuation = sp_value [ continuation_index ...next_whitespace_index ]
896- second_whitespace = sp_value [ next_whitespace_index ..]
897-
898- new_tokens << IgnoreStateToken . new ( [
899- [ sp_line , sp_column ] ,
900- :on_sp ,
901- first_whitespace ,
902- prev_token_state
903- ] ) unless first_whitespace . empty?
904-
905- new_tokens << IgnoreStateToken . new ( [
906- [ sp_line , sp_column + continuation_index ] ,
907- :on_sp ,
908- continuation ,
909- prev_token_state
910- ] )
911-
912- new_tokens << IgnoreStateToken . new ( [
913- [ sp_line + 1 , 0 ] ,
914- :on_sp ,
915- second_whitespace ,
916- prev_token_state
917- ] ) unless second_whitespace . empty?
918- else
919- new_tokens << IgnoreStateToken . new ( [
920- [ sp_line , sp_column ] ,
921- :on_sp ,
922- sp_value ,
923- prev_token_state
924- ] )
925- end
926- end
927-
928- new_tokens << token
929- prev_token_state = token . state
930- prev_token_end = start_offset + token . value . bytesize
931- end
932-
933- unless data_loc # no trailing :on_sp with __END__ as it is always preceded by :on_nl
934- end_offset = eof_token . location . end_offset
935- if prev_token_end < end_offset
936- new_tokens << IgnoreStateToken . new ( [
937- [ source . line ( prev_token_end ) , source . column ( prev_token_end ) ] ,
938- :on_sp ,
939- source . slice ( prev_token_end , end_offset - prev_token_end ) ,
940- prev_token_state
941- ] )
942- end
943- end
944-
945- new_tokens
860+ Result . new ( tokens , result . comments , result . magic_comments , result . data_loc , result . errors , result . warnings , Source . for ( source ) )
946861 end
947862 end
948863
0 commit comments