Skip to content

Commit 21c8b2a

Browse files
committed
added way to detect comments during parsing
1 parent 28e102a commit 21c8b2a

File tree

8 files changed

+267
-75
lines changed

8 files changed

+267
-75
lines changed

src/main/java/com/igormaznitsa/prologparser/GenericPrologParser.java

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,8 @@
2727
* Generic version of prolog parser.
2828
*/
2929
public class GenericPrologParser extends PrologParser {
30-
public GenericPrologParser(final Reader reader, final ParserContext context) {
31-
super(reader, context);
30+
public GenericPrologParser(final Reader reader, final ParserContext context,
31+
final TokenizedCommentListener... tokenizedCommentListeners) {
32+
super(reader, context, tokenizedCommentListeners);
3233
}
3334
}

src/main/java/com/igormaznitsa/prologparser/ParserContext.java

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,12 @@ public interface ParserContext {
6868
* @since 2.0.2
6969
*/
7070
int FLAG_ZERO_QUOTATION_ALLOWS_WHITESPACE_CHAR = 64;
71+
/**
72+
* Return commentaries as atoms.
73+
*
74+
* @since 2.2.0
75+
*/
76+
int FLAG_COMMENTS_AS_ATOMS = 128;
7177

7278
/**
7379
* Check that the context contains an operator starts with some string.

src/main/java/com/igormaznitsa/prologparser/PrologParser.java

Lines changed: 44 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121

2222
package com.igormaznitsa.prologparser;
2323

24+
import static com.igormaznitsa.prologparser.ParserContext.FLAG_COMMENTS_AS_ATOMS;
2425
import static com.igormaznitsa.prologparser.ParserContext.FLAG_DOT2_AS_LIST;
2526
import static com.igormaznitsa.prologparser.ParserContext.FLAG_NONE;
2627
import static com.igormaznitsa.prologparser.ParserContext.FLAG_VAR_AS_FUNCTOR;
@@ -102,13 +103,25 @@ public abstract class PrologParser implements Iterable<PrologTerm>, AutoCloseabl
102103

103104
protected final ParserContext context;
104105
protected final int parserFlags;
106+
private final boolean commentsAsAtoms;
105107
private final Tokenizer tokenizer;
106108
private boolean autoCloseReaderFlag;
109+
private final List<TokenizedCommentListener> commentTokenListeners;
107110

108-
protected PrologParser(final Reader source, final ParserContext context) {
111+
protected PrologParser(
112+
final Reader source,
113+
final ParserContext context,
114+
final TokenizedCommentListener... tokenizedCommentListeners
115+
) {
109116
this.context = context == null ? DefaultParserContext.of(ParserContext.FLAG_NONE) : context;
110117
this.parserFlags = context == null ? FLAG_NONE : context.getFlags();
118+
this.commentsAsAtoms = (this.parserFlags & FLAG_COMMENTS_AS_ATOMS) != 0;
111119
this.tokenizer = new Tokenizer(this, META_OP_MAP, requireNonNull(source));
120+
if (tokenizedCommentListeners.length == 0) {
121+
this.commentTokenListeners = List.of();
122+
} else {
123+
this.commentTokenListeners = List.of(tokenizedCommentListeners);
124+
}
112125
}
113126

114127
public static Op findBaseMetaOperator(final String text, final OpAssoc type) {
@@ -178,14 +191,36 @@ public boolean hasNext() {
178191
return this.tokenizer.peek() != null;
179192
}
180193

194+
private TokenizerResult readNextTokenCommentAware() {
195+
TokenizerResult result;
196+
if (this.commentsAsAtoms) {
197+
while (true) {
198+
result = this.tokenizer.readNextToken();
199+
if (result != null
200+
&& (result.getResult().getQuotation() == Quotation.COMMENT_BLOCK ||
201+
result.getResult().getQuotation() == Quotation.COMMENT_LINE)) {
202+
for (final TokenizedCommentListener listener : this.commentTokenListeners) {
203+
listener.onCommentToken(this, result);
204+
}
205+
} else {
206+
break;
207+
}
208+
}
209+
} else {
210+
result = this.tokenizer.readNextToken();
211+
}
212+
return result;
213+
}
214+
181215
public PrologTerm next() {
182216
final PrologTerm found = readBlock(OPERATORS_PHRASE);
183217
if (found == null) {
184218
throw new NoSuchElementException("No terms in source");
185219
} else {
186-
final TokenizerResult endAtom = this.tokenizer.readNextToken();
220+
final TokenizerResult endAtom = this.readNextTokenCommentAware();
187221
if (endAtom == null || !endAtom.getResult().getText().equals(OPERATOR_DOT.getText())) {
188-
throw new PrologParserException("End operator is not found", this.tokenizer.getLine(),
222+
throw new PrologParserException("End operator is not found",
223+
this.tokenizer.getLine(),
189224
this.tokenizer.getPos());
190225
}
191226
}
@@ -203,7 +238,7 @@ private PrologStruct readStruct(final PrologTerm functor) {
203238
return null;
204239
}
205240

206-
final TokenizerResult nextAtom = this.tokenizer.readNextToken();
241+
final TokenizerResult nextAtom = this.readNextTokenCommentAware();
207242
if (nextAtom == null) {
208243
throw new PrologParserException("Can't read next token in block", this.tokenizer.getLine(),
209244
this.tokenizer.getPos());
@@ -242,7 +277,7 @@ private PrologTerm readList(final TokenizerResult openingBracket) {
242277
while (continueReading) {
243278
final PrologTerm block = readBlock(OPERATORS_INSIDE_LIST);
244279

245-
final TokenizerResult nextAtom = this.tokenizer.readNextToken();
280+
final TokenizerResult nextAtom = this.readNextTokenCommentAware();
246281
if (nextAtom == null) {
247282
throw new PrologParserException("Can't read next token in list", this.tokenizer.getLine(),
248283
this.tokenizer.getPos());
@@ -281,7 +316,7 @@ private PrologTerm readList(final TokenizerResult openingBracket) {
281316
tokenizer.getLastTokenPos(), null);
282317
}
283318

284-
final TokenizerResult nextAtomTwo = tokenizer.readNextToken();
319+
final TokenizerResult nextAtomTwo = this.readNextTokenCommentAware();
285320
if (nextAtomTwo == null) {
286321
throw new PrologParserException("Can't find expected token in list",
287322
this.tokenizer.getLine(), this.tokenizer.getPos());
@@ -353,7 +388,7 @@ private PrologTerm readBlock(final Koi7CharOpMap endOperators) {
353388

354389
while (true) {
355390
// read next atom from tokenizer
356-
TokenizerResult readAtomContainer = this.tokenizer.readNextToken();
391+
TokenizerResult readAtomContainer = this.readNextTokenCommentAware();
357392

358393
if (readAtomContainer == null) {
359394
if (currentTreeItem == null) {
@@ -470,7 +505,7 @@ private PrologTerm readBlock(final Koi7CharOpMap endOperators) {
470505
readAtomContainer.getLine(), readAtomContainer.getPos());
471506
}
472507

473-
final TokenizerResult token = this.tokenizer.readNextToken();
508+
final TokenizerResult token = this.readNextTokenCommentAware();
474509

475510
final PrologTerm closingAtom;
476511
if (token == null) {
@@ -499,7 +534,7 @@ private PrologTerm readBlock(final Koi7CharOpMap endOperators) {
499534
}
500535
} else {
501536
if (readAtom.getType() != TermType.VAR || (this.parserFlags & FLAG_VAR_AS_FUNCTOR) != 0) {
502-
TokenizerResult nextToken = this.tokenizer.readNextToken();
537+
TokenizerResult nextToken = this.readNextTokenCommentAware();
503538

504539
if (nextToken == null) {
505540
throw new PrologParserException("Non-closed clause", this.tokenizer.getLastTokenLine(),
Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
package com.igormaznitsa.prologparser;
2+
3+
import com.igormaznitsa.prologparser.tokenizer.TokenizerResult;
4+
5+
/**
6+
* Listener gets notifications from parser for met parsed comments if detection flag is on
7+
*
8+
* @see ParserContext#FLAG_COMMENTS_AS_ATOMS
9+
* @since 2.2.0
10+
*/
11+
@FunctionalInterface
12+
public interface TokenizedCommentListener {
13+
/**
14+
* Notification from parser that either a line comment or a block comment has got as a tokenizer result.
15+
*
16+
* @param parser source prolog parser, must not be null
17+
* @param comment detected comment token as an atom, must not be null
18+
*/
19+
void onCommentToken(PrologParser parser, TokenizerResult comment);
20+
}

src/main/java/com/igormaznitsa/prologparser/terms/Quotation.java

Lines changed: 21 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,19 @@ public enum Quotation {
4747
* Term is back tick quotation
4848
* example: `hello`
4949
*/
50-
BACK_TICK("`");
50+
BACK_TICK("`"),
51+
/**
52+
* Special variant shows that content is line comment
53+
*
54+
* @since 2.2.0
55+
*/
56+
COMMENT_LINE("%"),
57+
/**
58+
* Special variant shows that content is block comment
59+
*
60+
* @since 2.2.0
61+
*/
62+
COMMENT_BLOCK("/*");
5163

5264
private final String quotationMark;
5365
public static final List<Quotation> VALUES = List.of(Quotation.values());
@@ -72,6 +84,13 @@ public String getQuotationMark() {
7284
* @return quoted string
7385
*/
7486
public String quoteString(final String str) {
75-
return this.quotationMark + escapeString(str == null ? "" : str, this) + this.quotationMark;
87+
switch (this) {
88+
case COMMENT_LINE:
89+
return COMMENT_LINE.quotationMark + str;
90+
case COMMENT_BLOCK:
91+
return COMMENT_BLOCK.quotationMark + str + "*/";
92+
default:
93+
return this.quotationMark + escapeString(str == null ? "" : str, this) + this.quotationMark;
94+
}
7695
}
7796
}

src/main/java/com/igormaznitsa/prologparser/tokenizer/Tokenizer.java

Lines changed: 58 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222
package com.igormaznitsa.prologparser.tokenizer;
2323

2424
import static com.igormaznitsa.prologparser.ParserContext.FLAG_BLOCK_COMMENTS;
25+
import static com.igormaznitsa.prologparser.ParserContext.FLAG_COMMENTS_AS_ATOMS;
2526
import static com.igormaznitsa.prologparser.ParserContext.FLAG_ZERO_QUOTATION_ALLOWS_WHITESPACE_CHAR;
2627
import static com.igormaznitsa.prologparser.ParserContext.FLAG_ZERO_QUOTATION_CHARCODE;
2728
import static com.igormaznitsa.prologparser.tokenizer.TokenizerState.ATOM;
@@ -58,6 +59,7 @@ public final class Tokenizer {
5859
private final StringBuilderEx specCharBuf;
5960
private final StringBuilderEx insideCharBuffer;
6061
private final boolean blockCommentsAllowed;
62+
private final boolean returnCommentsAsToken;
6163
private final boolean zeroSingleQuotationAllowed;
6264
private final boolean zeroQuotationAllowsWhitespaceChar;
6365
private final Reader reader;
@@ -82,6 +84,8 @@ public Tokenizer(final PrologParser parser, final Koi7CharOpMap metaOperators,
8284

8385
final int maxAllowedCharBufferSize = parser.getContext() == null ? Integer.MAX_VALUE :
8486
parser.getContext().getMaxTokenizerBufferLength();
87+
this.returnCommentsAsToken = parser.getContext() != null
88+
&& ((parser.getContext().getFlags() & FLAG_COMMENTS_AS_ATOMS) != 0);
8589
this.blockCommentsAllowed = parser.getContext() != null
8690
&& ((parser.getContext().getFlags() & FLAG_BLOCK_COMMENTS) != 0);
8791
this.zeroSingleQuotationAllowed = parser.getContext() != null
@@ -269,25 +273,47 @@ public void fixPosition() {
269273
this.lastTokenPos = this.pos - 1;
270274
}
271275

272-
private void skipUntilBlockCommentEnd() throws IOException {
276+
private String skipTillBlockCommentEnd(final boolean accumulateText) throws IOException {
277+
final StringBuilder result = accumulateText ? new StringBuilder() : null;
273278
boolean starCharDetected = false;
274279
while (true) {
275280
final int readChar = this.doReadChar();
276-
if (readChar < 0 || (readChar == '/' && starCharDetected)) {
281+
if (readChar < 0) {
277282
break;
283+
} else if (readChar == '/') {
284+
if (starCharDetected) {
285+
if (accumulateText) {
286+
result.setLength(result.length() - 1);
287+
}
288+
break;
289+
} else {
290+
if (accumulateText) {
291+
result.append((char) readChar);
292+
}
293+
}
278294
} else {
279295
starCharDetected = readChar == '*';
296+
if (accumulateText) {
297+
result.append((char) readChar);
298+
}
280299
}
281300
}
301+
return accumulateText ? result.toString() : null;
282302
}
283303

284-
private void skipUntilNextString() throws IOException {
304+
private String skipTillNextLine(final boolean accumulateText) throws IOException {
305+
final StringBuilder result = accumulateText ? new StringBuilder() : null;
306+
285307
while (true) {
286308
final int readChar = this.doReadChar();
287309
if (readChar < 0 || readChar == '\n') {
288310
break;
289311
}
312+
if (accumulateText) {
313+
result.append((char) readChar);
314+
}
290315
}
316+
return accumulateText ? result.toString() : null;
291317
}
292318

293319
public TokenizerResult pop() {
@@ -324,10 +350,10 @@ public TokenizerResult readNextToken() {
324350
final StringBuilderEx strBuffer = this.strBuf;
325351
final StringBuilderEx specCharBuffer = this.specCharBuf;
326352

327-
OpContainer lastFoundFullOperator = null;
353+
final boolean commentsAsAtoms = this.returnCommentsAsToken;
328354

355+
OpContainer lastFoundFullOperator = null;
329356
boolean letterOrDigitOnly = false;
330-
331357
boolean foundUnderscoreInNumber = false;
332358

333359
try {
@@ -416,18 +442,30 @@ public TokenizerResult readNextToken() {
416442

417443
final char chr = (char) readChar;
418444

419-
if (state != STRING && this.blockCommentsAllowed && chr == '*'
420-
&& this.strBuf.isLastChar('/')) {
445+
if (state != STRING
446+
&& this.blockCommentsAllowed
447+
&& chr == '*'
448+
&& this.strBuf.isLastChar('/')
449+
) {
421450
if (this.strBuf.isSingleChar('/')) {
422451
this.strBuf.pop();
423452
state = this.strBuf.isEmpty() ? LOOK_FOR : state;
424453
} else if (state == OPERATOR) {
425454
throw new PrologParserException("Operator can be mixed with comment block: "
426-
+ this.strBuf + chr, getLastTokenLine(), getLastTokenPos());
455+
+ this.strBuf + chr, this.getLastTokenLine(), this.getLastTokenPos());
427456
}
428457

429-
skipUntilBlockCommentEnd();
430-
458+
if (commentsAsAtoms) {
459+
final String commentText = this.skipTillBlockCommentEnd(true);
460+
return new TokenizerResult(
461+
new PrologAtom(commentText, Quotation.COMMENT_BLOCK),
462+
state,
463+
this.getLastTokenLine(),
464+
this.getLastTokenPos()
465+
);
466+
} else {
467+
this.skipTillBlockCommentEnd(false);
468+
}
431469
} else {
432470
switch (state) {
433471
case LOOK_FOR: {
@@ -437,7 +475,16 @@ public TokenizerResult readNextToken() {
437475

438476
switch (chr) {
439477
case '%': {
440-
skipUntilNextString();
478+
this.fixPosition();
479+
final String text = skipTillNextLine(commentsAsAtoms);
480+
if (commentsAsAtoms) {
481+
return new TokenizerResult(
482+
new PrologAtom(text, Quotation.COMMENT_LINE),
483+
state,
484+
this.getLastTokenLine(),
485+
this.getLastTokenPos()
486+
);
487+
}
441488
}
442489
break;
443490
case '_': {

src/main/java/com/igormaznitsa/prologparser/tokenizer/TokenizerState.java

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,5 +32,7 @@ public enum TokenizerState {
3232
OPERATOR,
3333
VAR,
3434
INTEGER,
35-
FLOAT
35+
FLOAT,
36+
LINE_COMMENT,
37+
BLOCK_COMMENT
3638
}

0 commit comments

Comments
 (0)