Skip to content
Merged
Show file tree
Hide file tree
Changes from 7 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 8 additions & 12 deletions rascal-textmate-core/src/main/rascal/lang/rascal/grammar/Util.rsc
Original file line number Diff line number Diff line change
Expand Up @@ -31,15 +31,6 @@ bool tryParse(Grammar g, Symbol s, str input, bool allowAmbiguity = false) {
return false;
}

@synopsis{
Gets the terminals that occur in production `p`, possibly recursively
(default: `true`)
}

set[Symbol] getTerminals(Grammar g, Production p, bool recur = true)
= {s | s <- p.symbols, !isNonTerminalType(s)}
+ {*getTerminals(g, child) | recur, s <- p.symbols, child <- lookup(g, s)};

@synopsis{
Lookups a list of productions for symbol `s` in grammar `g`, replacing
formal parameters with actual parameters when needed
Expand Down Expand Up @@ -84,21 +75,26 @@ Symbol expand(\iter-star-seps(symbol, separators))
Removes the label from symbol `s`, if any
}

Symbol delabel(label(_, Symbol s)) = s;
default Symbol delabel(Symbol s) = s;
Symbol delabel(\label(_, Symbol s)) = delabel(s);
default Symbol delabel(Symbol s) = s;

@synopsis{
Removes operators `?` and `*` from symbol `s`, if any
}

Symbol destar(label(name, symbol))
Symbol destar(\label(name, symbol))
= label(name, destar(symbol));

Symbol destar(\opt(symbol))
= destar(symbol);
Symbol destar(\iter-star(symbol))
= \iter(destar(symbol));
Symbol destar(\iter-star-seps(symbol, separators))
= \iter-seps(destar(symbol), separators);
Symbol destar(\seq([symbol]))
= \seq([destar(symbol)]);
Symbol destar(\alt({symbol}))
= \alt({destar(symbol)});

default Symbol destar(Symbol s) = s;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,17 +7,129 @@ module lang::rascal::grammar::analyze::Newlines
import Grammar;
import ParseTree;
import String;
import util::Maybe;

import lang::rascal::grammar::Util;
import util::MaybeUtil;

@synopsis{
Representation of a *newline-free* segment of symbols
}

alias Segment = list[Symbol];

@synopsis{
Gets the (newline-free) segments of a production/list of symbols in grammar
`g`, separated by symbols that have a newline (not part of any segment),
recursively for non-terminals. For instance, the segments of
`[lit("foo"), lit("bar"), lit("\n"), lit("baz")]` are:
- `[lit("foo"), lit("bar")]`;
- `[lit("baz")]`.
}

set[Segment] getSegments(Grammar g, Production p) {
return unmaybe(getSegmentsByProduction(g)[p]);
}

set[Segment] getSegments(Grammar g, list[Symbol] symbols) {
map[Production, Maybe[set[Segment]]] env = getSegmentsByProduction(g);
return unmaybe(getSegmentsWithEnvironment(g, symbols, env));
}

@memo
private map[Production, Maybe[set[Segment]]] getSegmentsByProduction(Grammar g) {
map[Production, Maybe[set[Segment]]] ret = (p : nothing() | /p: prod(_, _, _) := g);

solve (ret) {
for (p <- ret, nothing() == ret[p]) {
ret[p] = getSegmentsWithEnvironment(g, p.symbols, ret);
}
}

return ret;
}

private Maybe[set[Segment]] getSegmentsWithEnvironment(
Grammar g, list[Symbol] symbols,
map[Production, Maybe[set[Segment]]] env) {

// General idea: Recursively traverse `symbols` from left to right, while
// keeping track of a "running segment" (initially empty). Each time a
// symbol that has a newline is encountered, finish/collect the running
// segment, and start a new one for the remainder of `symbols`.

// Final case: No symbols remaining
Maybe[set[Segment]] get(Segment runningSegment, []) {
return just(_ <- runningSegment ? {runningSegment} : {});
}

// Recursive case: At least one symbol remaining
Maybe[set[Segment]] get(Segment segment, [Symbol head, *Symbol tail]) {
set[Symbol] nested = {s | /Symbol s := head};

// If the head contains a non-terminal, then: (1) finish the running
// segment; (2) lookup the segments of the non-terminals in the
// environment, if any; (3) compute the segments of the tail. Return the
// union of 1-3.
if (any(s <- nested, isNonTerminalType(s))) {

list[Maybe[set[Segment]]] sets
= [get(segment, [])] // (1)
+ [env[p] | s <- nested, isNonTerminalType(s), p <- lookup(g, s)] // (2)
+ [get([], tail)]; // (3)

return (sets[0] | union(it, \set) | \set <- sets[1..]);

}

// If the head doesn't contain a non-terminal, but it has a newline,
// then: (1) finish the running segment; (2) compute the segments of the
// tail. Return the union of 1-2. Note: the head is ignored and won't be
// part of any segment.
else if (any(s <- nested, hasNewline(g, s))) {
return union(get(segment, []), get([], tail));
}

// If the head doesn't contain a non-terminal, and if it doesn't have a
// newline, then add the head to the running segment and proceed with
// the tail.
else {
return get(segment + head, tail);
}
}

return get([], symbols);
}

@synopsis{
Checks if a symbol has a newline character
}

bool hasNewline(Grammar g, Symbol s) {
return any(p <- lookup(g, delabel(s)), hasNewline(g, p));
}

@synopsis{
Checks if a production has a newline character
}

bool hasNewline(Grammar g, prod(_, symbols, _)) {
set[Symbol] nonTerminals = {s | /Symbol s := symbols, isNonTerminalType(s)};
return any(/r: range(_, _) := symbols, hasNewline(r)) ||
any(s <- nonTerminals, Production p <- lookup(g, s), hasNewline(g, p));
bool hasNewline(Grammar g, Production p) {
return hasNewlineByProduction(g)[p];
}

@memo
private map[Production, bool] hasNewlineByProduction(Grammar g) {
map[Production, bool] ret = (p: false | /p: prod(_, _, _) := g);

solve (ret) {
for (p <- ret, !ret[p]) {
set[Symbol] nonTerminals = {s | /Symbol s := p.symbols, isNonTerminalType(s)};
ret[p] = ret[p] || any(/r: range(_, _) := p.symbols, hasNewline(r))
|| any(s <- nonTerminals, Production child <- lookup(g, s), ret[child]);
}
}

return ret;
}

@synopsis{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ import ParseTree;
import util::Maybe;

import lang::rascal::grammar::Util;
import util::MaybeUtil;

@synopsis{
Representation of a traversal direction along a list of symbols
Expand Down Expand Up @@ -112,16 +113,6 @@ private map[Symbol, Maybe[set[Symbol]]] followBySymbol(Grammar g, bool(Symbol) p
return ret;
}

private set[Symbol] unmaybe(just(set[Symbol] \set))
= \set;
private set[Symbol] unmaybe(nothing())
= {};

private Maybe[set[Symbol]] union(just(set[Symbol] \set1), just(set[Symbol] \set2))
= just(\set1 + \set2);
private default Maybe[set[Symbol]] union(Maybe[set[Symbol]] _, Maybe[set[Symbol]] _)
= nothing();

@synopsis{
Checks if symbol `s` is a terminal
}
Expand Down
14 changes: 10 additions & 4 deletions rascal-textmate-core/src/main/rascal/lang/textmate/Conversion.rsc
Original file line number Diff line number Diff line change
Expand Up @@ -215,11 +215,17 @@ private list[ConversionUnit] addInnerRules(list[ConversionUnit] units) {
// Simple case: each unit does have an `end` inner delimiter
if (_ <- group && all(u <- group, just(_) := u.innerDelimiters.end)) {

// Compute a list of terminals that need to be consumed between
// Compute a list of segments that need to be consumed between
// the `begin` delimiter and the `end` delimiters. Each of these
// terminals will be converted to a match pattern.
list[Symbol] terminals = [*getTerminals(rsc, u.prod) | u <- group];
terminals = [s | s <- terminals, s notin begins && s notin ends];
// segments will be converted to a match pattern.
set[list[Symbol]] segments = {*getSegments(rsc, u.prod) | u <- group};

list[Symbol] terminals
= [\seq([ *ys ]) | [x, *ys, z] <- segments, x == begin, z in ends]
+ [\seq([ *ys, z]) | [x, *ys, z] <- segments, x == begin, z notin ends]
+ [\seq([x, *ys ]) | [x, *ys, z] <- segments, x != begin, z in ends]
+ [\seq([x, *ys, z]) | [x, *ys, z] <- segments, x != begin, z notin ends];

terminals = [destar(s) | s <- terminals]; // The tokenization engine always tries to apply rules repeatedly
terminals = dup(terminals);
terminals = terminals + \char-class([range(1,0x10FFFF)]); // Any char (as a fallback)
Expand Down
28 changes: 28 additions & 0 deletions rascal-textmate-core/src/main/rascal/util/MaybeUtil.rsc
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
@synopsis{
Utility functions for `Maybe` values
}

module util::MaybeUtil

import util::Maybe;

@synopsis{
Returns the set of a `Maybe` value when present. Returns the empty set when
absent.
}

set[&T] unmaybe(Maybe[set[&T]] _: nothing())
= {};
set[&T] unmaybe(Maybe[set[&T]] _: just(set[&T] \set))
= \set;

@synopsis{
Returns just the union of the sets of two `Maybe` values when present.
Returns nothing if absent.
}

Maybe[set[&T]] union(just(set[&T] set1), just(set[&T] set2))
= just(set1 + set2);

default Maybe[set[&T]] union(Maybe[set[&T]] _, Maybe[set[&T]] _)
= nothing();
2 changes: 1 addition & 1 deletion vscode-extension/syntaxes/pico.tmLanguage.json
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
"end": "(\\\")",
"patterns": [
{
"match": "((?:\\\")[\\x{01}-\\!\\#-\\x{10FFFF}]*?(?:\\\"))",
"match": "([\\x{01}-\\!\\#-\\x{10FFFF}]+?)",
"captures": {
"1": {
"name": "string.quoted.double"
Expand Down
96 changes: 92 additions & 4 deletions vscode-extension/syntaxes/rascal.tmLanguage.json
Original file line number Diff line number Diff line change
Expand Up @@ -221,7 +221,39 @@
"end": "((?:\\\")|(?:\\<))",
"patterns": [
{
"match": "((?:(?:(?:\\\\)[\\\"\\'\\<\\>\\\\bfnrt])|[\\x{01}-\\!\\#-\\&\\(-\\;\\=\\?-\\[\\]-\\x{10FFFF}]|(?:(?:\\n)[\\t\\x{20}\\x{A0}\\x{1680}\\x{2000}-\\x{200A}\\x{202F}\\x{205F}\\x{3000}]*?(?:\\'))|(?:(?:(?:\\\\)U(?:(?:\\b10\\b)|(?:(?:\\b0\\b)[0-9A-Fa-f]))[0-9A-Fa-f][0-9A-Fa-f][0-9A-Fa-f][0-9A-Fa-f])|(?:(?:\\\\)u[0-9A-Fa-f][0-9A-Fa-f][0-9A-Fa-f][0-9A-Fa-f])|(?:(?:\\\\)a[0-7][0-9A-Fa-f])))+?)",
"match": "((?:\\\\)U(?:(?:\\b10\\b)|(?:(?:\\b0\\b)[0-9A-Fa-f]))[0-9A-Fa-f][0-9A-Fa-f][0-9A-Fa-f][0-9A-Fa-f])",
"captures": {
"1": {
"name": "string.quoted.double"
}
}
},
{
"match": "((?:\\n)[\\t\\x{20}\\x{A0}\\x{1680}\\x{2000}-\\x{200A}\\x{202F}\\x{205F}\\x{3000}]*?(?:\\'))",
"captures": {
"1": {
"name": "string.quoted.double"
}
}
},
{
"match": "((?:\\\\)[\\\"\\'\\<\\>\\\\bfnrt])",
"captures": {
"1": {
"name": "string.quoted.double"
}
}
},
{
"match": "((?:\\\\)a[0-7][0-9A-Fa-f])",
"captures": {
"1": {
"name": "string.quoted.double"
}
}
},
{
"match": "((?:\\\\)u[0-9A-Fa-f][0-9A-Fa-f][0-9A-Fa-f][0-9A-Fa-f])",
"captures": {
"1": {
"name": "string.quoted.double"
Expand Down Expand Up @@ -254,15 +286,39 @@
"end": "((?:\\\")|(?:\\<))",
"patterns": [
{
"match": "((?:(?:(?:\\\\)[\\\"\\'\\<\\>\\\\bfnrt])|[\\x{01}-\\!\\#-\\&\\(-\\;\\=\\?-\\[\\]-\\x{10FFFF}]|(?:(?:\\n)[\\t\\x{20}\\x{A0}\\x{1680}\\x{2000}-\\x{200A}\\x{202F}\\x{205F}\\x{3000}]*?(?:\\'))|(?:(?:(?:\\\\)U(?:(?:\\b10\\b)|(?:(?:\\b0\\b)[0-9A-Fa-f]))[0-9A-Fa-f][0-9A-Fa-f][0-9A-Fa-f][0-9A-Fa-f])|(?:(?:\\\\)u[0-9A-Fa-f][0-9A-Fa-f][0-9A-Fa-f][0-9A-Fa-f])|(?:(?:\\\\)a[0-7][0-9A-Fa-f])))+?)",
"match": "((?:\\\\)U(?:(?:\\b10\\b)|(?:(?:\\b0\\b)[0-9A-Fa-f]))[0-9A-Fa-f][0-9A-Fa-f][0-9A-Fa-f][0-9A-Fa-f])",
"captures": {
"1": {
"name": "string.quoted.double"
}
}
},
{
"match": "((?:(?:(?:\\\\)[\\\"\\'\\<\\>\\\\bfnrt])|[\\x{01}-\\!\\#-\\&\\(-\\;\\=\\?-\\[\\]-\\x{10FFFF}]|(?:(?:\\n)[\\t\\x{20}\\x{A0}\\x{1680}\\x{2000}-\\x{200A}\\x{202F}\\x{205F}\\x{3000}]*?(?:\\'))|(?:(?:(?:\\\\)U(?:(?:\\b10\\b)|(?:(?:\\b0\\b)[0-9A-Fa-f]))[0-9A-Fa-f][0-9A-Fa-f][0-9A-Fa-f][0-9A-Fa-f])|(?:(?:\\\\)u[0-9A-Fa-f][0-9A-Fa-f][0-9A-Fa-f][0-9A-Fa-f])|(?:(?:\\\\)a[0-7][0-9A-Fa-f])))+?)",
"match": "((?:\\n)[\\t\\x{20}\\x{A0}\\x{1680}\\x{2000}-\\x{200A}\\x{202F}\\x{205F}\\x{3000}]*?(?:\\'))",
"captures": {
"1": {
"name": "string.quoted.double"
}
}
},
{
"match": "((?:\\\\)[\\\"\\'\\<\\>\\\\bfnrt])",
"captures": {
"1": {
"name": "string.quoted.double"
}
}
},
{
"match": "((?:\\\\)a[0-7][0-9A-Fa-f])",
"captures": {
"1": {
"name": "string.quoted.double"
}
}
},
{
"match": "((?:\\\\)u[0-9A-Fa-f][0-9A-Fa-f][0-9A-Fa-f][0-9A-Fa-f])",
"captures": {
"1": {
"name": "string.quoted.double"
Expand Down Expand Up @@ -477,7 +533,39 @@
"end": "(\\')",
"patterns": [
{
"match": "((?:(?:(?:\\\\)[\\\"\\'\\<\\>\\\\bfnrt])|[\\x{01}-\\!\\#-\\&\\(-\\;\\=\\?-\\[\\]-\\x{10FFFF}]|(?:(?:\\n)[\\t\\x{20}\\x{A0}\\x{1680}\\x{2000}-\\x{200A}\\x{202F}\\x{205F}\\x{3000}]*?(?:\\'))|(?:(?:(?:\\\\)U(?:(?:\\b10\\b)|(?:(?:\\b0\\b)[0-9A-Fa-f]))[0-9A-Fa-f][0-9A-Fa-f][0-9A-Fa-f][0-9A-Fa-f])|(?:(?:\\\\)u[0-9A-Fa-f][0-9A-Fa-f][0-9A-Fa-f][0-9A-Fa-f])|(?:(?:\\\\)a[0-7][0-9A-Fa-f])))+?)",
"match": "((?:\\n)[\\t\\x{20}\\x{A0}\\x{1680}\\x{2000}-\\x{200A}\\x{202F}\\x{205F}\\x{3000}]*?)",
"captures": {
"1": {
"name": "string.quoted.single"
}
}
},
{
"match": "((?:\\\\)U(?:(?:\\b10\\b)|(?:(?:\\b0\\b)[0-9A-Fa-f]))[0-9A-Fa-f][0-9A-Fa-f][0-9A-Fa-f][0-9A-Fa-f])",
"captures": {
"1": {
"name": "string.quoted.single"
}
}
},
{
"match": "((?:\\\\)[\\\"\\'\\<\\>\\\\bfnrt])",
"captures": {
"1": {
"name": "string.quoted.single"
}
}
},
{
"match": "((?:\\\\)a[0-7][0-9A-Fa-f])",
"captures": {
"1": {
"name": "string.quoted.single"
}
}
},
{
"match": "((?:\\\\)u[0-9A-Fa-f][0-9A-Fa-f][0-9A-Fa-f][0-9A-Fa-f])",
"captures": {
"1": {
"name": "string.quoted.single"
Expand Down