Skip to content

Commit 2b20215

Browse files
committed
Updated TOKENIZER for the parser.
1 parent 7698fb4 commit 2b20215

File tree

2 files changed

+243
-0
lines changed

2 files changed

+243
-0
lines changed

src/Essentials/Tokenizer.cpp

Lines changed: 189 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,189 @@
1+
#include "Tokenizer.h"
2+
3+
namespace FPL::Tokenizer {
4+
std::vector<Token> TokenBuilder::ParseToken(const std::string &contentFile) {
5+
std::vector<Token> AllTokens;
6+
Token currentToken;
7+
8+
currentToken.TokenLineNumber = 1;
9+
10+
for (auto const &element: contentFile) {
11+
if (currentToken.TokenType == STRING_ESCAPE_SEQUENCE) {
12+
switch (element) {
13+
case 'n':
14+
currentToken.TokenText.append(1, '\n');
15+
break;
16+
case 'r':
17+
currentToken.TokenText.append(1, '\r');
18+
break;
19+
case 't':
20+
currentToken.TokenText.append(1, '\t');
21+
break;
22+
case '\\':
23+
currentToken.TokenText.append(1, '\\');
24+
break;
25+
default:
26+
throw std::runtime_error(std::string("unknown escape sequence: \\") + std::string(1, element) +
27+
" in string on line " + currentToken.TokenText + ".");
28+
}
29+
currentToken.TokenType = CHAINE_LITTERAL;
30+
continue;
31+
} else if (currentToken.TokenType == PEUTETRE_COMMENT && element != '/') {
32+
currentToken.TokenType = OPERATEUR;
33+
TokenBuilder::ParseEndToken(currentToken, AllTokens);
34+
continue;
35+
}
36+
37+
switch (element) {
38+
case '0':
39+
case '1':
40+
case '2':
41+
case '3':
42+
case '4':
43+
case '5':
44+
case '6':
45+
case '7':
46+
case '8':
47+
case '9':
48+
if (currentToken.TokenType == ESPACE_VIDE) {
49+
currentToken.TokenType = ENTIER;
50+
currentToken.TokenText.append(1, element);
51+
} else if (currentToken.TokenType == PEUTETRE_DECIMAL) {
52+
currentToken.TokenType = DECIMAL;
53+
currentToken.TokenText.append(1, element);
54+
} else if (currentToken.TokenType == IDENTIFIANT) {
55+
TokenBuilder::ParseEndToken(currentToken, AllTokens);
56+
currentToken.TokenType = ENTIER;
57+
currentToken.TokenText.append(1, element);
58+
} else {
59+
currentToken.TokenText.append(1, element);
60+
}
61+
break;
62+
63+
case '.':
64+
if (currentToken.TokenType == ESPACE_VIDE) {
65+
currentToken.TokenType = PEUTETRE_DECIMAL;
66+
currentToken.TokenText.append(1, element);
67+
} else if (currentToken.TokenType == ENTIER) {
68+
currentToken.TokenType = DECIMAL;
69+
currentToken.TokenText.append(1, element);
70+
} else if (currentToken.TokenType == CHAINE_LITTERAL) {
71+
currentToken.TokenText.append(1, element);
72+
} else {
73+
TokenBuilder::ParseEndToken(currentToken, AllTokens);
74+
currentToken.TokenType = OPERATEUR;
75+
currentToken.TokenText.append(1, element);
76+
TokenBuilder::ParseEndToken(currentToken, AllTokens);
77+
}
78+
break;
79+
80+
case '{':
81+
case '}':
82+
case '(':
83+
case ')':
84+
case ',':
85+
case ';':
86+
case '-':
87+
case '+':
88+
case '*':
89+
case '=':
90+
case ':':
91+
case '<':
92+
case '>':
93+
case '[':
94+
case ']':
95+
if (currentToken.TokenType != CHAINE_LITTERAL) {
96+
TokenBuilder::ParseEndToken(currentToken, AllTokens);
97+
currentToken.TokenType = OPERATEUR;
98+
currentToken.TokenText.append(1, element);
99+
TokenBuilder::ParseEndToken(currentToken, AllTokens);
100+
} else {
101+
currentToken.TokenText.append(1, element);
102+
}
103+
break;
104+
105+
case '"':
106+
if (currentToken.TokenType != CHAINE_LITTERAL) {
107+
TokenBuilder::ParseEndToken(currentToken, AllTokens);
108+
currentToken.TokenType = CHAINE_LITTERAL;
109+
} else if (currentToken.TokenType == CHAINE_LITTERAL) {
110+
TokenBuilder::ParseEndToken(currentToken, AllTokens);
111+
}
112+
break;
113+
114+
case ' ':
115+
case '\t':
116+
if (currentToken.TokenType == CHAINE_LITTERAL || currentToken.TokenType == COMMENT) {
117+
currentToken.TokenText.append(1, element);
118+
} else {
119+
TokenBuilder::ParseEndToken(currentToken, AllTokens);
120+
}
121+
break;
122+
123+
case '\r':
124+
case '\n':
125+
TokenBuilder::ParseEndToken(currentToken, AllTokens);
126+
++currentToken.TokenLineNumber;
127+
break;
128+
129+
case '\\':
130+
if (currentToken.TokenType == CHAINE_LITTERAL) {
131+
currentToken.TokenType = STRING_ESCAPE_SEQUENCE;
132+
} else {
133+
TokenBuilder::ParseEndToken(currentToken, AllTokens);
134+
currentToken.TokenType = OPERATEUR;
135+
currentToken.TokenText.append(1, element);
136+
TokenBuilder::ParseEndToken(currentToken, AllTokens);
137+
}
138+
break;
139+
140+
case '/':
141+
if (currentToken.TokenType == CHAINE_LITTERAL) {
142+
currentToken.TokenText.append(1, element);
143+
} else if (currentToken.TokenType == PEUTETRE_COMMENT) {
144+
currentToken.TokenType = COMMENT;
145+
currentToken.TokenText.erase();
146+
} else {
147+
TokenBuilder::ParseEndToken(currentToken, AllTokens);
148+
currentToken.TokenType = PEUTETRE_COMMENT;
149+
currentToken.TokenText.append(1, element);
150+
}
151+
break;
152+
153+
default:
154+
if (currentToken.TokenType == ESPACE_VIDE || currentToken.TokenType == ENTIER || currentToken.TokenType == DECIMAL) {
155+
TokenBuilder::ParseEndToken(currentToken, AllTokens);
156+
currentToken.TokenType = IDENTIFIANT;
157+
currentToken.TokenText.append(1, element);
158+
} else {
159+
currentToken.TokenText.append(1, element);
160+
}
161+
break;
162+
}
163+
}
164+
return AllTokens;
165+
}
166+
167+
void TokenBuilder::ParseEndToken(Token &token, std::vector<Token> &tokens) {
168+
if (token.TokenType == COMMENT) {
169+
//std::cout << "Commentaire ignoré : " << token.TokenText << std::endl;
170+
} else if (token.TokenType != ESPACE_VIDE) {
171+
tokens.push_back(token);
172+
}
173+
174+
if (token.TokenType == PEUTETRE_DECIMAL) {
175+
if (token.TokenText == ".") {
176+
token.TokenType = OPERATEUR;
177+
} else {
178+
token.TokenType = PEUTETRE_DECIMAL;
179+
}
180+
}
181+
token.TokenType = ESPACE_VIDE;
182+
token.TokenText.erase();
183+
}
184+
185+
std::ostream &operator<<(std::ostream &flux, const Token &token) {
186+
flux << "(Token Flux: " << token.TokenText << ", " << FPL::Tokenizer::sAllTokenTypes[token.TokenType] << ")";
187+
return flux;
188+
}
189+
}

src/Essentials/Tokenizer.h

Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,54 @@
1+
#pragma once
2+
3+
#include <iostream>
4+
#include <string>
5+
#include <vector>
6+
#include <ostream>
7+
#include <cctype>
8+
#include <algorithm>
9+
10+
namespace FPL::Tokenizer {
11+
12+
enum TokenTypes {
13+
ESPACE_VIDE,
14+
IDENTIFIANT,
15+
ENTIER,
16+
DECIMAL,
17+
PEUTETRE_DECIMAL,
18+
CHAINE_LITTERAL,
19+
OPERATEUR,
20+
STRING_ESCAPE_SEQUENCE,
21+
PEUTETRE_COMMENT,
22+
COMMENT
23+
};
24+
25+
// Une copie sera faite au max, constexpr permet de mettre à disponible dès la compilation
26+
inline constexpr const char *sAllTokenTypes[] = {
27+
"ESPACE_VIDE",
28+
"IDENTIFIANT",
29+
"ENTIER",
30+
"DECIMAL",
31+
"PEUTETRE_DECIMAL",
32+
"CHAINE_LITTERAL",
33+
"OPERATEUR",
34+
"STRING_ESCAPE_SEQUENCE",
35+
"PEUTETRE_COMMENT",
36+
"COMMENT"
37+
};
38+
39+
class Token {
40+
public:
41+
enum TokenTypes TokenType = ESPACE_VIDE;
42+
std::string TokenText;
43+
size_t TokenLineNumber {0};
44+
45+
friend std::ostream& operator<<(std::ostream& flux, Token const& token);
46+
};
47+
48+
class TokenBuilder {
49+
public:
50+
static std::vector<Token> ParseToken(const std::string& contentFile);
51+
52+
static void ParseEndToken(Token& token, std::vector<Token>& tokens);
53+
};
54+
}

0 commit comments

Comments
 (0)