Skip to content

Commit 3b0f1f0

Browse files
committed
Исправлено зависание лексера на некоторых юникодных символах
1 parent 1dcabe6 commit 3b0f1f0

File tree

2 files changed

+41
-4
lines changed

2 files changed

+41
-4
lines changed

src/com/annimon/ownlang/parser/Lexer.java

Lines changed: 13 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -120,7 +120,7 @@ public List<Token> tokenize() {
120120
while (pos < length) {
121121
final char current = peek(0);
122122
if (Character.isDigit(current)) tokenizeNumber();
123-
else if (Character.isJavaIdentifierStart(current)) tokenizeWord();
123+
else if (isOwnLangIdentifierStart(current)) tokenizeWord();
124124
else if (current == '`') tokenizeExtendedWord();
125125
else if (current == '"') tokenizeText();
126126
else if (current == '#') {
@@ -208,9 +208,10 @@ private void tokenizeOperator() {
208208

209209
private void tokenizeWord() {
210210
clearBuffer();
211-
char current = peek(0);
211+
buffer.append(peek(0));
212+
char current = next();
212213
while (true) {
213-
if (!Character.isLetterOrDigit(current) && (current != '_') && (current != '$')) {
214+
if (!isOwnLangIdentifierPart(current)) {
214215
break;
215216
}
216217
buffer.append(current);
@@ -224,7 +225,7 @@ private void tokenizeWord() {
224225
addToken(TokenType.WORD, word);
225226
}
226227
}
227-
228+
228229
private void tokenizeExtendedWord() {
229230
next();// skip `
230231
clearBuffer();
@@ -306,6 +307,14 @@ private void tokenizeMultilineComment() {
306307
next(); // *
307308
next(); // /
308309
}
310+
311+
private boolean isOwnLangIdentifierStart(char current) {
312+
return (Character.isLetter(current) || (current == '_') || (current == '$'));
313+
}
314+
315+
private boolean isOwnLangIdentifierPart(char current) {
316+
return (Character.isLetterOrDigit(current) || (current == '_') || (current == '$'));
317+
}
309318

310319
private void clearBuffer() {
311320
buffer.setLength(0);

test/com/annimon/ownlang/parser/LexerTest.java

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -122,6 +122,34 @@ public void testCommentsError() {
122122
String input = "/* 1234 \n";
123123
Lexer.tokenize(input);
124124
}
125+
126+
@Test(expected = LexerException.class)
127+
public void testExtendedWordError() {
128+
String input = "` 1234";
129+
Lexer.tokenize(input);
130+
}
131+
132+
@Test
133+
public void testUnicodeCharacterIdentifier() {
134+
String input = "€ = 1";
135+
List<Token> expList = list(EQ, NUMBER);
136+
List<Token> result = Lexer.tokenize(input);
137+
assertTokens(expList, result);
138+
}
139+
140+
@Test
141+
public void testUnicodeCharacterExtendedWordIdentifier() {
142+
String input = "`€` = 1";
143+
List<Token> expList = list(WORD, EQ, NUMBER);
144+
List<Token> result = Lexer.tokenize(input);
145+
assertTokens(expList, result);
146+
}
147+
148+
@Test
149+
public void testUnicodeCharacterEOF() {
150+
String input = "€";
151+
assertTrue(Lexer.tokenize(input).isEmpty());
152+
}
125153

126154
private static void assertTokens(List<Token> expList, List<Token> result) {
127155
final int length = expList.size();

0 commit comments

Comments
 (0)