Skip to content

Commit 99a6366

Browse files
committed
Refactor the lexer to avoid altering the input
By maintaining an offset in the input for lexing rather than removing the matched part from the input, performance is better, as PHP does not need to copy big substrings anymore.
1 parent 6fa9789 commit 99a6366

1 file changed

Lines changed: 24 additions & 24 deletions

File tree

src/Seld/JsonLint/Lexer.php

Lines changed: 24 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -20,20 +20,20 @@ class Lexer
2020
{
2121
private $EOF = 1;
2222
private $rules = array(
23-
0 => '/^\s+/',
24-
1 => '/^-?([0-9]|[1-9][0-9]+)(\.[0-9]+)?([eE][+-]?[0-9]+)?\b/',
25-
2 => '{^"(?>\\\\["bfnrt/\\\\]|\\\\u[a-fA-F0-9]{4}|[^\0-\x1f\\\\"]++)*+"}',
26-
3 => '/^\{/',
27-
4 => '/^\}/',
28-
5 => '/^\[/',
29-
6 => '/^\]/',
30-
7 => '/^,/',
31-
8 => '/^:/',
32-
9 => '/^true\b/',
33-
10 => '/^false\b/',
34-
11 => '/^null\b/',
35-
12 => '/^$/',
36-
13 => '/^./',
23+
0 => '/\G\s+/',
24+
1 => '/\G-?([0-9]|[1-9][0-9]+)(\.[0-9]+)?([eE][+-]?[0-9]+)?\b/',
25+
2 => '{\G"(?>\\\\["bfnrt/\\\\]|\\\\u[a-fA-F0-9]{4}|[^\0-\x1f\\\\"]++)*+"}',
26+
3 => '/\G\{/',
27+
4 => '/\G\}/',
28+
5 => '/\G\[/',
29+
6 => '/\G\]/',
30+
7 => '/\G,/',
31+
8 => '/\G:/',
32+
9 => '/\Gtrue\b/',
33+
10 => '/\Gfalse\b/',
34+
11 => '/\Gnull\b/',
35+
12 => '/\G$/',
36+
13 => '/\G./',
3737
);
3838

3939
private $conditions = array(
@@ -47,7 +47,7 @@ class Lexer
4747
private $input;
4848
private $more;
4949
private $done;
50-
private $matched;
50+
private $offset;
5151

5252
public $match;
5353
public $yylineno;
@@ -70,8 +70,9 @@ public function setInput($input)
7070
$this->input = $input;
7171
$this->more = false;
7272
$this->done = false;
73+
$this->offset = 0;
7374
$this->yylineno = $this->yyleng = 0;
74-
$this->yytext = $this->matched = $this->match = '';
75+
$this->yytext = $this->match = '';
7576
$this->conditionStack = array('INITIAL');
7677
$this->yylloc = array('first_line' => 1, 'first_column' => 0, 'last_line' => 1, 'last_column' => 0);
7778

@@ -88,16 +89,16 @@ public function showPosition()
8889

8990
public function getPastInput()
9091
{
91-
$past = substr($this->matched, 0, strlen($this->matched) - strlen($this->match));
92+
$pastLength = $this->offset - strlen($this->match);
9293

93-
return (strlen($past) > 20 ? '...' : '') . substr($past, -20);
94+
return ($pastLength > 20 ? '...' : '') . substr($this->input, max(0, $pastLength - 20), min(20, $pastLength));
9495
}
9596

9697
public function getUpcomingInput()
9798
{
9899
$next = $this->match;
99100
if (strlen($next) < 20) {
100-
$next .= substr($this->input, 0, 20 - strlen($next));
101+
$next .= substr($this->input, $this->offset, 20 - strlen($next));
101102
}
102103

103104
return substr($next, 0, 20) . (strlen($next) > 20 ? '...' : '');
@@ -113,7 +114,7 @@ private function next()
113114
if ($this->done) {
114115
return $this->EOF;
115116
}
116-
if ($this->input === '') {
117+
if ($this->offset === \strlen($this->input)) {
117118
$this->done = true;
118119
}
119120

@@ -131,7 +132,7 @@ private function next()
131132
$rulesLen = count($rules);
132133

133134
for ($i=0; $i < $rulesLen; $i++) {
134-
if (preg_match($this->rules[$rules[$i]], $this->input, $match)) {
135+
if (preg_match($this->rules[$rules[$i]], $this->input, $match, 0, $this->offset)) {
135136
preg_match_all('/\n.*/', $match[0], $lines);
136137
$lines = $lines[0];
137138
if ($lines) {
@@ -148,8 +149,7 @@ private function next()
148149
$this->match .= $match[0];
149150
$this->yyleng = strlen($this->yytext);
150151
$this->more = false;
151-
$this->input = substr($this->input, strlen($match[0]));
152-
$this->matched .= $match[0];
152+
$this->offset += \strlen($match[0]);
153153
$token = $this->performAction($rules[$i], $this->conditionStack[count($this->conditionStack)-1]);
154154
if ($token) {
155155
return $token;
@@ -159,7 +159,7 @@ private function next()
159159
}
160160
}
161161

162-
if ($this->input === "") {
162+
if ($this->offset === \strlen($this->input)) {
163163
return $this->EOF;
164164
}
165165

0 commit comments

Comments
 (0)