Skip to content

Commit 6df491d

Browse files
authored
Merge pull request #9 from SWAT-engineering/better-rascal-grammar
Generate proper precede requirement based on layout follow restriction
2 parents cd31e74 + 654cb88 commit 6df491d

File tree

6 files changed

+164
-16
lines changed

6 files changed

+164
-16
lines changed

rascal-textmate-core/src/main/rascal/lang/oniguruma/Conversion.rsc

Lines changed: 14 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@ import util::Math;
1313

1414
import lang::oniguruma::RegExp;
1515
import lang::rascal::grammar::Util;
16+
import lang::rascal::grammar::analyze::Symbols;
1617

1718
@synopsis{
1819
Converts a set/list of values (presumably: productions, symbols, or
@@ -25,10 +26,21 @@ list[RegExp] toRegExps(Grammar g, list[value] values)
2526
= [toRegExp(g, v) | v <- values];
2627
2728
@synopsis{
28-
Converts a production to a regular expression.
29+
Converts a production to a regular expression, optionally with a
30+
grammar-dependent `\precede` guard (default: `false`)
2931
}
3032
31-
RegExp toRegExp(Grammar g, prod(_, symbols, attributes)) {
33+
RegExp toRegExp(Grammar g, prod(def, symbols, attributes), bool guard = false) {
34+
if (guard && delabel(def) in g.rules && {\conditional(_, conditions)} := precede(g, def)) {
35+
set[Symbol] alternatives
36+
= {s | \not-follow(s) <- conditions}
37+
+ {\conditional(\empty(), {\begin-of-line()})};
38+
39+
Condition guard = \precede(\alt(alternatives));
40+
Symbol guarded = \conditional(\seq(symbols), {guard});
41+
return toRegExp(g, prod(def, [guarded], attributes));
42+
}
43+
3244
RegExp re = infix("", toRegExps(g, symbols)); // Empty separator for concatenation
3345
return /\tag("category"(c)) := attributes ? group(re, category = c) : re;
3446
}
Lines changed: 130 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,130 @@
1+
@synopsis{
2+
Types and functions to analyze symbols
3+
}
4+
5+
@description{
6+
Note: Some functions in this module seemingly overlap with those in module
7+
`lang::rascal::grammar::Lookahead` (i.e., computation of first/follow sets).
8+
However, only symbols of the form `\char-class(_)` are considered terminals
9+
in that module, which is too strict for the purpose of this project.
10+
}
11+
12+
// TODO: The analysis of delimiters in module
13+
// `lang::rascal::grammar::analyze::Delimiters` can probably be rewritten (less
14+
// code) to use functions in this module.
15+
16+
module lang::rascal::grammar::analyze::Symbols
17+
18+
import Grammar;
19+
import ParseTree;
20+
import util::Maybe;
21+
22+
import lang::rascal::grammar::Util;
23+
24+
@synopsis{
25+
Representation of a traversal direction along a list of symbols
26+
}
27+
28+
data Direction // Traverse lists of symbols (in productions)...
29+
= forward() // - ...from left to right;
30+
| backward() // - ...from right to left.
31+
;
32+
33+
private list[&T] reorder(list[&T] l, forward()) = l;
34+
private list[&T] reorder(list[&T] l, backward()) = reverse(l);
35+
36+
@synopsis{
37+
Computes the *last* set of symbol `s` in grammar `g`
38+
}
39+
40+
set[Symbol] last(Grammar g, Symbol s)
41+
= unmaybe(firstBySymbol(g, isTerminal, backward())[delabel(s)]);
42+
43+
@synopsis{
44+
Computes the *first* set of symbol `s` in grammar `g`
45+
}
46+
47+
set[Symbol] first(Grammar g, Symbol s)
48+
= unmaybe(firstBySymbol(g, isTerminal, forward())[delabel(s)]);
49+
50+
@memo
51+
private map[Symbol, Maybe[set[Symbol]]] firstBySymbol(Grammar g, bool(Symbol) predicate, Direction dir) {
52+
map[Symbol, Maybe[set[Symbol]]] ret
53+
= (delabel(s): nothing() | s <- g.rules) // Non-terminals
54+
+ (delabel(s): nothing() | /prod(_, [*_, s, *_], _) := g, !isNonTerminalType(s)); // Terminals
55+
56+
Maybe[set[Symbol]] firstOf([])
57+
= just({});
58+
Maybe[set[Symbol]] firstOf([h, *t])
59+
= \set: just({\empty(), *_}) := ret[delabel(h)]
60+
? union(\set, firstOf(t))
61+
: ret[delabel(h)];
62+
63+
solve (ret) {
64+
for (s <- ret, nothing() == ret[s]) {
65+
if (predicate(s)) {
66+
ret[s] = just({s});
67+
} else if (list[Production] prods: [_, *_] := lookup(g, s)) {
68+
ret[s] = (just({}) | union(it, firstOf(reorder(p.symbols, dir))) | p <- prods);
69+
} else {
70+
ret[s] = just({\empty()});
71+
}
72+
}
73+
}
74+
75+
return ret;
76+
}
77+
78+
@synopsis{
79+
Computes the *precede* set of symbol `s` in grammar `g`
80+
}
81+
82+
set[Symbol] precede(Grammar g, Symbol s)
83+
= unmaybe(followBySymbol(g, isTerminal, backward())[delabel(s)]);
84+
85+
@synopsis{
86+
Computes the *follow* set of symbol `s` in grammar `g`
87+
}
88+
89+
set[Symbol] follow(Grammar g, Symbol s)
90+
= unmaybe(followBySymbol(g, isTerminal, forward())[delabel(s)]);
91+
92+
@memo
93+
private map[Symbol, Maybe[set[Symbol]]] followBySymbol(Grammar g, bool(Symbol) predicate, Direction dir) {
94+
map[Symbol, Maybe[set[Symbol]]] ret = (delabel(s): nothing() | s <- g.rules); // Non-terminals
95+
96+
Maybe[set[Symbol]] followOf(Symbol parent, [])
97+
= ret[delabel(parent)];
98+
Maybe[set[Symbol]] followOf(Symbol parent, [h, *t])
99+
= just({\empty(), *rest}) := firstBySymbol(g, predicate, dir)[delabel(h)]
100+
? union(just(rest), followOf(parent, t))
101+
: firstBySymbol(g, predicate, dir)[delabel(h)];
102+
103+
solve (ret) {
104+
for (s <- ret, nothing() == ret[s]) {
105+
ret[s] = just({});
106+
for (/prod(def, symbols, _) := g, [*_, t, *after] := reorder(symbols, dir), s == delabel(t)) {
107+
ret[s] = union(ret[s], followOf(def, after));
108+
}
109+
}
110+
}
111+
112+
return ret;
113+
}
114+
115+
private set[Symbol] unmaybe(just(set[Symbol] \set))
116+
= \set;
117+
private set[Symbol] unmaybe(nothing())
118+
= {};
119+
120+
private Maybe[set[Symbol]] union(just(set[Symbol] \set1), just(set[Symbol] \set2))
121+
= just(\set1 + \set2);
122+
private default Maybe[set[Symbol]] union(Maybe[set[Symbol]] _, Maybe[set[Symbol]] _)
123+
= nothing();
124+
125+
@synopsis{
126+
Checks if symbol `s` is a terminal
127+
}
128+
129+
bool isTerminal(Symbol s)
130+
= !isNonTerminalType(s);

rascal-textmate-core/src/main/rascal/lang/textmate/Conversion.rsc

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -225,7 +225,7 @@ TmRule toTmRule(ConversionUnit u, NameGenerator g)
225225
private TmRule toTmRule(RscGrammar rsc, p: prod(def, _, _), str name)
226226
= !isSynthetic(def) && <just(begin), just(end)> := getOuterDelimiterPair(rsc, p)
227227
? toTmRule(toRegExp(rsc, begin), toRegExp(rsc, end), "<begin.string><end.string>", [toTmRule(toRegExp(rsc, p), name)])
228-
: toTmRule(toRegExp(rsc, p), name);
228+
: toTmRule(toRegExp(rsc, p, guard = true), name);
229229
230230
private TmRule toTmRule(RegExp re, str name)
231231
= match(re.string, captures = toCaptures(re.categories), name = name);

rascal-textmate-core/src/main/rascal/lang/textmate/conversiontests/PicoWithCategories.rsc

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,7 @@ syntax Expression
4141
;
4242

4343
lexical Id = [a-z][a-z0-9]* !>> [a-z0-9];
44-
lexical Natural = [0-9]+;
44+
lexical Natural = [0-9]+ !>> [0-9];
4545
lexical String = "\"" ![\"]* "\"";
4646

4747
layout Layout = WhitespaceAndComment* !>> [\ \t\n\r%];

rascal-textmate-core/src/main/rascal/lang/textmate/conversiontests/PicoWithCategories.test

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,12 @@
2121
# ^^^ variable.other
2222
# ^ -variable.other
2323
# ^ -constant.numeric
24+
# ^^^ -constant.numeric
25+
26+
foo 123
27+
# ^^^ variable.other
28+
# ^ -variable.other
29+
# ^ -constant.numeric
2430
# ^^^ constant.numeric
2531

2632
natural: natural;

0 commit comments

Comments
 (0)