From 6af5e8bca7201d0808ee6e44c9ce914b1a104c0d Mon Sep 17 00:00:00 2001 From: thePanz Date: Wed, 4 Apr 2018 20:01:32 +0200 Subject: [PATCH 1/6] Add support for range [a TO b] --- lib/Languages/Galach/TokenExtractor/Full.php | 9 ++++ lib/Languages/Galach/Tokenizer.php | 1 + lib/Languages/Galach/Values/Token/Range.php | 47 ++++++++++++++++++++ tests/Galach/Tokenizer/FullTokenizerTest.php | 7 +++ tests/Galach/Tokenizer/TextTokenizerTest.php | 7 +++ 5 files changed, 71 insertions(+) create mode 100644 lib/Languages/Galach/Values/Token/Range.php diff --git a/lib/Languages/Galach/TokenExtractor/Full.php b/lib/Languages/Galach/TokenExtractor/Full.php index 6733b39..5683290 100644 --- a/lib/Languages/Galach/TokenExtractor/Full.php +++ b/lib/Languages/Galach/TokenExtractor/Full.php @@ -5,6 +5,7 @@ use QueryTranslator\Languages\Galach\TokenExtractor; use QueryTranslator\Languages\Galach\Tokenizer; use QueryTranslator\Languages\Galach\Values\Token\Phrase; +use QueryTranslator\Languages\Galach\Values\Token\Range; use QueryTranslator\Languages\Galach\Values\Token\Tag; use QueryTranslator\Languages\Galach\Values\Token\User; use QueryTranslator\Languages\Galach\Values\Token\Word; @@ -35,6 +36,7 @@ final class Full extends TokenExtractor '/(?(?:(?(?[a-zA-Z0-9_][a-zA-Z0-9_\-.]*)))(?:[\s"()+!]|$)/Au' => Tokenizer::TOKEN_TERM, '/(?(?:(?(?[a-zA-Z0-9_][a-zA-Z0-9_\-.]*)))(?:[\s"()+!]|$)/Au' => Tokenizer::TOKEN_TERM, '/(?(?:(?[a-zA-Z_][a-zA-Z0-9_\-.]*):)?(?(?.*?)(?:(? Tokenizer::TOKEN_TERM, + '/(?(?:(?[a-zA-Z_][a-zA-Z0-9_\-.]*):)?\[(?[a-zA-Z0-9]+) TO (?[a-zA-Z0-9]+)\])/Aus' => Tokenizer::TOKEN_TERM, '/(?(?:(?[a-zA-Z_][a-zA-Z0-9_\-.]*):)?(?(?:\\\\\\\\|\\\\ |\\\\\(|\\\\\)|\\\\"|[^"()\s])+?))(?:(? Tokenizer::TOKEN_TERM, ]; @@ -48,6 +50,13 @@ protected function createTermToken($position, array $data) $lexeme = $data['lexeme']; switch (true) { + case isset($data['rangeFrom']) && isset($data['rangeTo']): + return new Range( + $lexeme, + $position, + $data['domain'], + $data['rangeFrom'], $data['rangeTo'] + ); case isset($data['word']): return new Word( $lexeme, diff --git a/lib/Languages/Galach/Tokenizer.php b/lib/Languages/Galach/Tokenizer.php index f87438e..325464e 100644 --- a/lib/Languages/Galach/Tokenizer.php +++ b/lib/Languages/Galach/Tokenizer.php @@ -78,6 +78,7 @@ final class Tokenizer implements Tokenizing * @see \QueryTranslator\Languages\Galach\Values\Token\Tag * @see \QueryTranslator\Languages\Galach\Values\Token\User * @see \QueryTranslator\Languages\Galach\Values\Token\Word + * @see \QueryTranslator\Languages\Galach\Values\Token\Range */ const TOKEN_TERM = 512; diff --git a/lib/Languages/Galach/Values/Token/Range.php b/lib/Languages/Galach/Values/Token/Range.php new file mode 100644 index 0000000..171518e --- /dev/null +++ b/lib/Languages/Galach/Values/Token/Range.php @@ -0,0 +1,47 @@ +domain = $domain; + $this->rangeFrom = $rangeFrom; + $this->rangeTo = $rangeTo; + } +} diff --git a/tests/Galach/Tokenizer/FullTokenizerTest.php b/tests/Galach/Tokenizer/FullTokenizerTest.php index 55c3dfb..c050e69 100644 --- a/tests/Galach/Tokenizer/FullTokenizerTest.php +++ b/tests/Galach/Tokenizer/FullTokenizerTest.php @@ -8,6 +8,7 @@ use QueryTranslator\Languages\Galach\Values\Token\GroupBegin as GroupBeginToken; use QueryTranslator\Languages\Galach\Values\Token\GroupBegin; use QueryTranslator\Languages\Galach\Values\Token\Phrase as PhraseToken; +use QueryTranslator\Languages\Galach\Values\Token\Range as RangeToken; use QueryTranslator\Languages\Galach\Values\Token\Tag as TagToken; use QueryTranslator\Languages\Galach\Values\Token\User as UserToken; use QueryTranslator\Languages\Galach\Values\Token\Word as WordToken; @@ -112,6 +113,12 @@ public function providerForTestTokenize() new WordToken('word\\ word', 0, '', 'word word'), ], ], + [ + '[a TO b]', + [ + new RangeToken('[a TO b]', 0, '', 'a', 'b'), + ], + ], [ '"phrase"', [ diff --git a/tests/Galach/Tokenizer/TextTokenizerTest.php b/tests/Galach/Tokenizer/TextTokenizerTest.php index 8fb2eaa..60c1c88 100644 --- a/tests/Galach/Tokenizer/TextTokenizerTest.php +++ b/tests/Galach/Tokenizer/TextTokenizerTest.php @@ -95,6 +95,13 @@ public static function setUpBeforeClass() new WordToken('@user', 0, '', '@user'), new Token(Tokenizer::TOKEN_GROUP_END, ')', 5), ], + '[a TO b]' => [ + new WordToken('[a', 0, '', '[a'), + new Token(Tokenizer::TOKEN_WHITESPACE, ' ', 2), + new WordToken('TO', 3, '', 'TO'), + new Token(Tokenizer::TOKEN_WHITESPACE, ' ', 5), + new WordToken('b]', 6, '', 'b]'), + ], 'domain:domain:' => [ new WordToken('domain:domain:', 0, '', 'domain:domain:'), ], From 285543d223f936fffca5cdfcb3ce2d009011374e Mon Sep 17 00:00:00 2001 From: thePanz Date: Thu, 5 Apr 2018 10:45:06 +0200 Subject: [PATCH 2/6] Added inclusive and exclusive range tokenization --- lib/Languages/Galach/TokenExtractor/Full.php | 7 ++- lib/Languages/Galach/Values/Token/Range.php | 38 +++++++++++- tests/Galach/Tokenizer/FullTokenizerTest.php | 8 ++- tests/Galach/Tokenizer/TextTokenizerTest.php | 7 +++ tests/Galach/Values/Token/RangeTest.php | 65 ++++++++++++++++++++ 5 files changed, 119 insertions(+), 6 deletions(-) create mode 100644 tests/Galach/Values/Token/RangeTest.php diff --git a/lib/Languages/Galach/TokenExtractor/Full.php b/lib/Languages/Galach/TokenExtractor/Full.php index 5683290..3de88d7 100644 --- a/lib/Languages/Galach/TokenExtractor/Full.php +++ b/lib/Languages/Galach/TokenExtractor/Full.php @@ -36,7 +36,7 @@ final class Full extends TokenExtractor '/(?(?:(?(?[a-zA-Z0-9_][a-zA-Z0-9_\-.]*)))(?:[\s"()+!]|$)/Au' => Tokenizer::TOKEN_TERM, '/(?(?:(?(?[a-zA-Z0-9_][a-zA-Z0-9_\-.]*)))(?:[\s"()+!]|$)/Au' => Tokenizer::TOKEN_TERM, '/(?(?:(?[a-zA-Z_][a-zA-Z0-9_\-.]*):)?(?(?.*?)(?:(? Tokenizer::TOKEN_TERM, - '/(?(?:(?[a-zA-Z_][a-zA-Z0-9_\-.]*):)?\[(?[a-zA-Z0-9]+) TO (?[a-zA-Z0-9]+)\])/Aus' => Tokenizer::TOKEN_TERM, + '/(?(?:(?[a-zA-Z_][a-zA-Z0-9_\-.]*):)?(?[\[\{])(?[a-zA-Z0-9]+) TO (?[a-zA-Z0-9]+)[\]\}])/Aus' => Tokenizer::TOKEN_TERM, '/(?(?:(?[a-zA-Z_][a-zA-Z0-9_\-.]*):)?(?(?:\\\\\\\\|\\\\ |\\\\\(|\\\\\)|\\\\"|[^"()\s])+?))(?:(? Tokenizer::TOKEN_TERM, ]; @@ -50,12 +50,13 @@ protected function createTermToken($position, array $data) $lexeme = $data['lexeme']; switch (true) { - case isset($data['rangeFrom']) && isset($data['rangeTo']): + case isset($data['rangeStartSymbol']): return new Range( $lexeme, $position, $data['domain'], - $data['rangeFrom'], $data['rangeTo'] + $data['rangeFrom'], $data['rangeTo'], + Range::getTypeByStart($data['rangeStartSymbol']) ); case isset($data['word']): return new Word( diff --git a/lib/Languages/Galach/Values/Token/Range.php b/lib/Languages/Galach/Values/Token/Range.php index 171518e..43bec37 100644 --- a/lib/Languages/Galach/Values/Token/Range.php +++ b/lib/Languages/Galach/Values/Token/Range.php @@ -12,6 +12,9 @@ */ final class Range extends Token { + const TYPE_INCLUSIVE = 'inclusive'; + const TYPE_EXCLUSIVE = 'exclusive'; + /** * Holds domain string. * @@ -29,19 +32,50 @@ final class Range extends Token */ public $rangeTo; + /** + * @var string + */ + public $type; + /** * @param string $lexeme - * @param int $position + * @param int $position * @param string $domain * @param string $rangeFrom * @param string $rangeTo + * @param string $type */ - public function __construct($lexeme, $position, $domain, $rangeFrom, $rangeTo) + public function __construct($lexeme, $position, $domain, $rangeFrom, $rangeTo, $type) { + if (!in_array($type, [self::TYPE_EXCLUSIVE, self::TYPE_INCLUSIVE])) { + throw new \InvalidArgumentException(sprintf('Invalid range type: %s', $type)); + } + parent::__construct(Tokenizer::TOKEN_TERM, $lexeme, $position); $this->domain = $domain; $this->rangeFrom = $rangeFrom; $this->rangeTo = $rangeTo; + $this->type = $type; + } + + /** + * Returns the range type, given the starting symbol. + * + * @param string $startSymbol the start symbol, either '[' or '{' + * + * @return string + */ + public static function getTypeByStart($startSymbol) + { + if ('[' === $startSymbol) { + return self::TYPE_INCLUSIVE; + } + + if ('{' === $startSymbol) { + return self::TYPE_EXCLUSIVE; + } + + throw new \InvalidArgumentException(sprintf('Invalid range start symbol: %s', $startSymbol)); } } diff --git a/tests/Galach/Tokenizer/FullTokenizerTest.php b/tests/Galach/Tokenizer/FullTokenizerTest.php index c050e69..e523f7d 100644 --- a/tests/Galach/Tokenizer/FullTokenizerTest.php +++ b/tests/Galach/Tokenizer/FullTokenizerTest.php @@ -116,7 +116,13 @@ public function providerForTestTokenize() [ '[a TO b]', [ - new RangeToken('[a TO b]', 0, '', 'a', 'b'), + new RangeToken('[a TO b]', 0, '', 'a', 'b', 'inclusive'), + ], + ], + [ + '{a TO b}', + [ + new RangeToken('{a TO b}', 0, '', 'a', 'b', 'exclusive'), ], ], [ diff --git a/tests/Galach/Tokenizer/TextTokenizerTest.php b/tests/Galach/Tokenizer/TextTokenizerTest.php index 60c1c88..8286f23 100644 --- a/tests/Galach/Tokenizer/TextTokenizerTest.php +++ b/tests/Galach/Tokenizer/TextTokenizerTest.php @@ -102,6 +102,13 @@ public static function setUpBeforeClass() new Token(Tokenizer::TOKEN_WHITESPACE, ' ', 5), new WordToken('b]', 6, '', 'b]'), ], + '{a TO b}' => [ + new WordToken('{a', 0, '', '{a'), + new Token(Tokenizer::TOKEN_WHITESPACE, ' ', 2), + new WordToken('TO', 3, '', 'TO'), + new Token(Tokenizer::TOKEN_WHITESPACE, ' ', 5), + new WordToken('b}', 6, '', 'b}'), + ], 'domain:domain:' => [ new WordToken('domain:domain:', 0, '', 'domain:domain:'), ], diff --git a/tests/Galach/Values/Token/RangeTest.php b/tests/Galach/Values/Token/RangeTest.php new file mode 100644 index 0000000..d27a97b --- /dev/null +++ b/tests/Galach/Values/Token/RangeTest.php @@ -0,0 +1,65 @@ +expectException(\InvalidArgumentException::class); + Range::getTypeByStart($startSymbol); + } + + public function successfulStartSymbolDataprovider() + { + return [ + ['inclusive', '['], + ['exclusive', '{'], + ]; + } + + /** + * @dataProvider successfulStartSymbolDataprovider + * @param string $expectedType + * @param string $startSymbol + */ + public function testGetTypeByStartSucceeds($expectedType, $startSymbol) + { + $this->assertSame($expectedType, Range::getTypeByStart($startSymbol)); + } + + public function failingTypeDataprovider() + { + return [ + [''], + [null], + ['other'], + ]; + } + + /** + * @dataProvider failingTypeDataprovider + * @param string $type + */ + public function testConstructorFailsWrongType($type) + { + $this->expectException(\InvalidArgumentException::class); + new Range('[a TO b]', 0, '', 'a', 'b', $type); + } +} From a7c3867a995d8658d9573dba98759d221ad004a4 Mon Sep 17 00:00:00 2001 From: thePanz Date: Thu, 5 Apr 2018 19:57:43 +0200 Subject: [PATCH 3/6] Add Range node generator and tests --- .../Galach/Generators/Native/Range.php | 50 +++++++++++ tests/Galach/Generators/Native/RangeTest.php | 90 +++++++++++++++++++ 2 files changed, 140 insertions(+) create mode 100644 lib/Languages/Galach/Generators/Native/Range.php create mode 100644 tests/Galach/Generators/Native/RangeTest.php diff --git a/lib/Languages/Galach/Generators/Native/Range.php b/lib/Languages/Galach/Generators/Native/Range.php new file mode 100644 index 0000000..3954c81 --- /dev/null +++ b/lib/Languages/Galach/Generators/Native/Range.php @@ -0,0 +1,50 @@ +token instanceof RangeToken; + } + + public function visit(Node $node, Visitor $subVisitor = null, $options = null) + { + if (!$node instanceof Term) { + throw new LogicException( + 'Implementation accepts instance of Term Node' + ); + } + + $token = $node->token; + + if (!$token instanceof RangeToken) { + throw new LogicException( + 'Implementation accepts instance of Range Token' + ); + } + + $domainPrefix = '' === $token->domain ? '' : "{$token->domain}:"; + + switch ($token->type) { + case RangeToken::TYPE_INCLUSIVE: + return $domainPrefix . '[' . $token->rangeFrom . ' TO ' . $token->rangeTo . ']'; + + case RangeToken::TYPE_EXCLUSIVE: + return $domainPrefix . '{' . $token->rangeFrom . ' TO ' . $token->rangeTo . '}'; + + default: + throw new LogicException(sprintf('Range type %s is not supported', $token->type)); + } + } +} diff --git a/tests/Galach/Generators/Native/RangeTest.php b/tests/Galach/Generators/Native/RangeTest.php new file mode 100644 index 0000000..c5ada5a --- /dev/null +++ b/tests/Galach/Generators/Native/RangeTest.php @@ -0,0 +1,90 @@ +visitor = new Range(); + } + + public function acceptDataprovider() + { + return [ + [true, new Term(new RangeToken('[a TO b]', 0, '', 'a', 'b', 'inclusive'))], + [false, new Term(new Word('word', 0, '', 'a'))], + ]; + } + + /** + * @param bool $expected + * @param Node $token + * + * @dataProvider acceptDataprovider + */ + public function testAccepts($expected, $node) + { + $this->assertSame($expected, $this->visitor->accept($node)); + } + + public function visitDataprovider() + { + return [ + ['[a TO b]', new Term(new RangeToken('[a TO b]', 0, '', 'a', 'b', 'inclusive'))], + ['{a TO b}', new Term(new RangeToken('{a TO b}', 0, '', 'a', 'b', 'exclusive'))], + ]; + } + + /** + * @param string $expected + * @param Node $token + * + * @dataProvider visitDataprovider + */ + public function testVisit($expected, $node) + { + $this->assertSame($expected, $this->visitor->visit($node)); + } + + public function visitWrongNodeDataprovider() + { + return [ + [new Mandatory()], + [new Term(new Word('word', 0, '', 'a'))], + ]; + } + + /** + * @param string $expected + * @param Node $token + * + * @dataProvider visitWrongNodeDataprovider + */ + public function testVisitWrongNodeFails($node) + { + $this->expectException(\LogicException::class); + $this->visitor->visit($node); + } + + public function testVisitUnknownTypeFails() + { + $this->expectException(\LogicException::class); + $node = new Term(new RangeToken('{a TO b}', 0, '', 'a', 'b', 'unknown')); + $this->visitor->visit($node); + } +} From 771b4864cc954aa72db72c4fea3987a37f1a2d3c Mon Sep 17 00:00:00 2001 From: thePanz Date: Fri, 6 Apr 2018 10:14:05 +0200 Subject: [PATCH 4/6] Refactor range start/end symbol handling, allow asymmetric ranges --- .../Galach/Generators/Native/Range.php | 38 +++++++++++-- lib/Languages/Galach/TokenExtractor/Full.php | 23 +++++++- lib/Languages/Galach/Values/Token/Range.php | 39 ++++++------- tests/Galach/Generators/Native/RangeTest.php | 27 +++++++-- tests/Galach/Tokenizer/FullTokenizerTest.php | 16 +++++- tests/Galach/Tokenizer/TextTokenizerTest.php | 14 +++++ tests/Galach/Values/Token/RangeTest.php | 57 +++++-------------- 7 files changed, 135 insertions(+), 79 deletions(-) diff --git a/lib/Languages/Galach/Generators/Native/Range.php b/lib/Languages/Galach/Generators/Native/Range.php index 3954c81..73665f3 100644 --- a/lib/Languages/Galach/Generators/Native/Range.php +++ b/lib/Languages/Galach/Generators/Native/Range.php @@ -36,15 +36,45 @@ public function visit(Node $node, Visitor $subVisitor = null, $options = null) $domainPrefix = '' === $token->domain ? '' : "{$token->domain}:"; - switch ($token->type) { + return $domainPrefix. + $this->buildRangeStart($token). + ' TO '. + $this->buildRangeEnd($token); + } + + /** + * @param RangeToken $token + * @return string + */ + private function buildRangeStart($token) + { + switch ($token->startType) { + case RangeToken::TYPE_INCLUSIVE: + return '[' . $token->rangeFrom; + + case RangeToken::TYPE_EXCLUSIVE: + return '{' . $token->rangeFrom; + + default: + throw new LogicException(sprintf('Range start type %s is not supported', $token->startType)); + } + } + + /** + * @param RangeToken $token + * @return string + */ + private function buildRangeEnd($token) + { + switch ($token->endType) { case RangeToken::TYPE_INCLUSIVE: - return $domainPrefix . '[' . $token->rangeFrom . ' TO ' . $token->rangeTo . ']'; + return $token->rangeTo. ']'; case RangeToken::TYPE_EXCLUSIVE: - return $domainPrefix . '{' . $token->rangeFrom . ' TO ' . $token->rangeTo . '}'; + return $token->rangeTo. '}'; default: - throw new LogicException(sprintf('Range type %s is not supported', $token->type)); + throw new LogicException(sprintf('Range end type %s is not supported', $token->endType)); } } } diff --git a/lib/Languages/Galach/TokenExtractor/Full.php b/lib/Languages/Galach/TokenExtractor/Full.php index 3de88d7..f464885 100644 --- a/lib/Languages/Galach/TokenExtractor/Full.php +++ b/lib/Languages/Galach/TokenExtractor/Full.php @@ -36,7 +36,7 @@ final class Full extends TokenExtractor '/(?(?:(?(?[a-zA-Z0-9_][a-zA-Z0-9_\-.]*)))(?:[\s"()+!]|$)/Au' => Tokenizer::TOKEN_TERM, '/(?(?:(?(?[a-zA-Z0-9_][a-zA-Z0-9_\-.]*)))(?:[\s"()+!]|$)/Au' => Tokenizer::TOKEN_TERM, '/(?(?:(?[a-zA-Z_][a-zA-Z0-9_\-.]*):)?(?(?.*?)(?:(? Tokenizer::TOKEN_TERM, - '/(?(?:(?[a-zA-Z_][a-zA-Z0-9_\-.]*):)?(?[\[\{])(?[a-zA-Z0-9]+) TO (?[a-zA-Z0-9]+)[\]\}])/Aus' => Tokenizer::TOKEN_TERM, + '/(?(?:(?[a-zA-Z_][a-zA-Z0-9_\-.]*):)?(?[\[\{])(?[a-zA-Z0-9]+) TO (?[a-zA-Z0-9]+)(?[\]\}]))/Aus' => Tokenizer::TOKEN_TERM, '/(?(?:(?[a-zA-Z_][a-zA-Z0-9_\-.]*):)?(?(?:\\\\\\\\|\\\\ |\\\\\(|\\\\\)|\\\\"|[^"()\s])+?))(?:(? Tokenizer::TOKEN_TERM, ]; @@ -50,13 +50,14 @@ protected function createTermToken($position, array $data) $lexeme = $data['lexeme']; switch (true) { - case isset($data['rangeStartSymbol']): + case (isset($data['rangeStartSymbol']) && isset($data['rangeEndSymbol'])): return new Range( $lexeme, $position, $data['domain'], $data['rangeFrom'], $data['rangeTo'], - Range::getTypeByStart($data['rangeStartSymbol']) + $this->getRangeTypeBySymbol($data['rangeStartSymbol']), + $this->getRangeTypeBySymbol($data['rangeEndSymbol']) ); case isset($data['word']): return new Word( @@ -95,4 +96,20 @@ protected function createTermToken($position, array $data) throw new RuntimeException('Could not extract term token from the given data'); } + + /** + * Returns the range type, given the symbol. + * + * @param string $symbol the range start/end symbol + * + * @return string + */ + protected function getRangeTypeBySymbol($symbol) + { + if (in_array($symbol, ['{','}'], true)) { + return Range::TYPE_EXCLUSIVE; + } + + return Range::TYPE_INCLUSIVE; + } } diff --git a/lib/Languages/Galach/Values/Token/Range.php b/lib/Languages/Galach/Values/Token/Range.php index 43bec37..16f5503 100644 --- a/lib/Languages/Galach/Values/Token/Range.php +++ b/lib/Languages/Galach/Values/Token/Range.php @@ -35,7 +35,12 @@ final class Range extends Token /** * @var string */ - public $type; + public $startType; + + /** + * @var string + */ + public $endType; /** * @param string $lexeme @@ -43,39 +48,27 @@ final class Range extends Token * @param string $domain * @param string $rangeFrom * @param string $rangeTo - * @param string $type + * @param string $startType + * @param string $endType */ - public function __construct($lexeme, $position, $domain, $rangeFrom, $rangeTo, $type) + public function __construct($lexeme, $position, $domain, $rangeFrom, $rangeTo, $startType, $endType) { - if (!in_array($type, [self::TYPE_EXCLUSIVE, self::TYPE_INCLUSIVE])) { - throw new \InvalidArgumentException(sprintf('Invalid range type: %s', $type)); - } + $this->ensureValidType($startType); + $this->ensureValidType($endType); parent::__construct(Tokenizer::TOKEN_TERM, $lexeme, $position); $this->domain = $domain; $this->rangeFrom = $rangeFrom; $this->rangeTo = $rangeTo; - $this->type = $type; + $this->startType = $startType; + $this->endType = $endType; } - /** - * Returns the range type, given the starting symbol. - * - * @param string $startSymbol the start symbol, either '[' or '{' - * - * @return string - */ - public static function getTypeByStart($startSymbol) + private function ensureValidType($type) { - if ('[' === $startSymbol) { - return self::TYPE_INCLUSIVE; - } - - if ('{' === $startSymbol) { - return self::TYPE_EXCLUSIVE; + if (!in_array($type, [self::TYPE_EXCLUSIVE, self::TYPE_INCLUSIVE])) { + throw new \InvalidArgumentException(sprintf('Invalid range type: %s', $type)); } - - throw new \InvalidArgumentException(sprintf('Invalid range start symbol: %s', $startSymbol)); } } diff --git a/tests/Galach/Generators/Native/RangeTest.php b/tests/Galach/Generators/Native/RangeTest.php index c5ada5a..8df8426 100644 --- a/tests/Galach/Generators/Native/RangeTest.php +++ b/tests/Galach/Generators/Native/RangeTest.php @@ -26,7 +26,7 @@ protected function setUp() public function acceptDataprovider() { return [ - [true, new Term(new RangeToken('[a TO b]', 0, '', 'a', 'b', 'inclusive'))], + [true, new Term(new RangeToken('[a TO b]', 0, '', 'a', 'b', 'inclusive', 'inclusive'))], [false, new Term(new Word('word', 0, '', 'a'))], ]; } @@ -45,8 +45,10 @@ public function testAccepts($expected, $node) public function visitDataprovider() { return [ - ['[a TO b]', new Term(new RangeToken('[a TO b]', 0, '', 'a', 'b', 'inclusive'))], - ['{a TO b}', new Term(new RangeToken('{a TO b}', 0, '', 'a', 'b', 'exclusive'))], + ['[a TO b]', new Term(new RangeToken('[a TO b]', 0, '', 'a', 'b', 'inclusive', 'inclusive'))], + ['[a TO b}', new Term(new RangeToken('[a TO b}', 0, '', 'a', 'b', 'inclusive', 'exclusive'))], + ['{a TO b}', new Term(new RangeToken('{a TO b}', 0, '', 'a', 'b', 'exclusive', 'exclusive'))], + ['{a TO b]', new Term(new RangeToken('{a TO b]', 0, '', 'a', 'b', 'exclusive', 'inclusive'))], ]; } @@ -81,10 +83,25 @@ public function testVisitWrongNodeFails($node) $this->visitor->visit($node); } - public function testVisitUnknownTypeFails() + public function testVisitUnknownRangeStartTypeFails() { + $token = new RangeToken('{a TO b}', 0, '', 'a', 'b', 'inclusive', 'inclusive'); + $token->startType = 'unknown'; + $node = new Term($token); + + $this->expectException(\LogicException::class); + $this->expectExceptionMessage('Range start type unknown is not supported'); + $this->visitor->visit($node); + } + + public function testVisitUnknownRangeEndTypeFails() + { + $token = new RangeToken('{a TO b}', 0, '', 'a', 'b', 'inclusive', 'inclusive'); + $token->endType = 'unknown'; + $node = new Term($token); + $this->expectException(\LogicException::class); - $node = new Term(new RangeToken('{a TO b}', 0, '', 'a', 'b', 'unknown')); + $this->expectExceptionMessage('Range end type unknown is not supported'); $this->visitor->visit($node); } } diff --git a/tests/Galach/Tokenizer/FullTokenizerTest.php b/tests/Galach/Tokenizer/FullTokenizerTest.php index e523f7d..1045328 100644 --- a/tests/Galach/Tokenizer/FullTokenizerTest.php +++ b/tests/Galach/Tokenizer/FullTokenizerTest.php @@ -116,13 +116,25 @@ public function providerForTestTokenize() [ '[a TO b]', [ - new RangeToken('[a TO b]', 0, '', 'a', 'b', 'inclusive'), + new RangeToken('[a TO b]', 0, '', 'a', 'b', 'inclusive', 'inclusive'), + ], + ], + [ + '[a TO b}', + [ + new RangeToken('[a TO b}', 0, '', 'a', 'b', 'inclusive', 'exclusive'), ], ], [ '{a TO b}', [ - new RangeToken('{a TO b}', 0, '', 'a', 'b', 'exclusive'), + new RangeToken('{a TO b}', 0, '', 'a', 'b', 'exclusive', 'exclusive'), + ], + ], + [ + '{a TO b]', + [ + new RangeToken('{a TO b]', 0, '', 'a', 'b', 'exclusive', 'inclusive'), ], ], [ diff --git a/tests/Galach/Tokenizer/TextTokenizerTest.php b/tests/Galach/Tokenizer/TextTokenizerTest.php index 8286f23..1c9380a 100644 --- a/tests/Galach/Tokenizer/TextTokenizerTest.php +++ b/tests/Galach/Tokenizer/TextTokenizerTest.php @@ -102,6 +102,13 @@ public static function setUpBeforeClass() new Token(Tokenizer::TOKEN_WHITESPACE, ' ', 5), new WordToken('b]', 6, '', 'b]'), ], + '[a TO b}' => [ + new WordToken('[a', 0, '', '[a'), + new Token(Tokenizer::TOKEN_WHITESPACE, ' ', 2), + new WordToken('TO', 3, '', 'TO'), + new Token(Tokenizer::TOKEN_WHITESPACE, ' ', 5), + new WordToken('b}', 6, '', 'b}'), + ], '{a TO b}' => [ new WordToken('{a', 0, '', '{a'), new Token(Tokenizer::TOKEN_WHITESPACE, ' ', 2), @@ -109,6 +116,13 @@ public static function setUpBeforeClass() new Token(Tokenizer::TOKEN_WHITESPACE, ' ', 5), new WordToken('b}', 6, '', 'b}'), ], + '{a TO b]' => [ + new WordToken('{a', 0, '', '{a'), + new Token(Tokenizer::TOKEN_WHITESPACE, ' ', 2), + new WordToken('TO', 3, '', 'TO'), + new Token(Tokenizer::TOKEN_WHITESPACE, ' ', 5), + new WordToken('b]', 6, '', 'b]'), + ], 'domain:domain:' => [ new WordToken('domain:domain:', 0, '', 'domain:domain:'), ], diff --git a/tests/Galach/Values/Token/RangeTest.php b/tests/Galach/Values/Token/RangeTest.php index d27a97b..b6b9bfe 100644 --- a/tests/Galach/Values/Token/RangeTest.php +++ b/tests/Galach/Values/Token/RangeTest.php @@ -7,49 +7,22 @@ class RangeTest extends TestCase { - public function failingStartSymbolDataprovider() - { - return [ - [''], - ['/'], - ['('], - ]; - } - - /** - * @dataProvider failingStartSymbolDataprovider - * @param string $startSymbol - */ - public function testGetTypeByStartFails($startSymbol) - { - $this->expectException(\InvalidArgumentException::class); - Range::getTypeByStart($startSymbol); - } - - public function successfulStartSymbolDataprovider() - { - return [ - ['inclusive', '['], - ['exclusive', '{'], - ]; - } - - /** - * @dataProvider successfulStartSymbolDataprovider - * @param string $expectedType - * @param string $startSymbol - */ - public function testGetTypeByStartSucceeds($expectedType, $startSymbol) - { - $this->assertSame($expectedType, Range::getTypeByStart($startSymbol)); - } - public function failingTypeDataprovider() { return [ - [''], - [null], - ['other'], + ['', 'inclusive'], + ['', 'exclusive'], + ['inclusive', ''], + ['exclusive', ''], + [null, null], + ['other', 'inclusive'], + ['other', 'exclusive'], + ['inclusive','other'], + ['exclusive','other'], + ['inclusive', null], + ['exclusive', null], + [null, 'inclusive'], + [null, 'exclusive'], ]; } @@ -57,9 +30,9 @@ public function failingTypeDataprovider() * @dataProvider failingTypeDataprovider * @param string $type */ - public function testConstructorFailsWrongType($type) + public function testConstructorFailsWrongType($startType, $endType) { $this->expectException(\InvalidArgumentException::class); - new Range('[a TO b]', 0, '', 'a', 'b', $type); + new Range('[a TO b]', 0, '', 'a', 'b', $startType, $endType); } } From f3d3f267d9753a48448fe9570d2494bd724969e4 Mon Sep 17 00:00:00 2001 From: thePanz Date: Fri, 6 Apr 2018 10:38:55 +0200 Subject: [PATCH 5/6] Allow dates and * for ranges --- lib/Languages/Galach/TokenExtractor/Full.php | 2 +- tests/Galach/Tokenizer/FullTokenizerTest.php | 18 +++++++++++++++++ tests/Galach/Tokenizer/TextTokenizerTest.php | 21 ++++++++++++++++++++ 3 files changed, 40 insertions(+), 1 deletion(-) diff --git a/lib/Languages/Galach/TokenExtractor/Full.php b/lib/Languages/Galach/TokenExtractor/Full.php index f464885..59a4899 100644 --- a/lib/Languages/Galach/TokenExtractor/Full.php +++ b/lib/Languages/Galach/TokenExtractor/Full.php @@ -36,7 +36,7 @@ final class Full extends TokenExtractor '/(?(?:(?(?[a-zA-Z0-9_][a-zA-Z0-9_\-.]*)))(?:[\s"()+!]|$)/Au' => Tokenizer::TOKEN_TERM, '/(?(?:(?(?[a-zA-Z0-9_][a-zA-Z0-9_\-.]*)))(?:[\s"()+!]|$)/Au' => Tokenizer::TOKEN_TERM, '/(?(?:(?[a-zA-Z_][a-zA-Z0-9_\-.]*):)?(?(?.*?)(?:(? Tokenizer::TOKEN_TERM, - '/(?(?:(?[a-zA-Z_][a-zA-Z0-9_\-.]*):)?(?[\[\{])(?[a-zA-Z0-9]+) TO (?[a-zA-Z0-9]+)(?[\]\}]))/Aus' => Tokenizer::TOKEN_TERM, + '/(?(?:(?[a-zA-Z_][a-zA-Z0-9_\-.]*):)?(?[\[\{])(?([a-zA-Z0-9-]+|\*)) TO (?([a-zA-Z0-9-]+|\*))(?[\]\}]))/Aus' => Tokenizer::TOKEN_TERM, '/(?(?:(?[a-zA-Z_][a-zA-Z0-9_\-.]*):)?(?(?:\\\\\\\\|\\\\ |\\\\\(|\\\\\)|\\\\"|[^"()\s])+?))(?:(? Tokenizer::TOKEN_TERM, ]; diff --git a/tests/Galach/Tokenizer/FullTokenizerTest.php b/tests/Galach/Tokenizer/FullTokenizerTest.php index 1045328..afb6d07 100644 --- a/tests/Galach/Tokenizer/FullTokenizerTest.php +++ b/tests/Galach/Tokenizer/FullTokenizerTest.php @@ -137,6 +137,24 @@ public function providerForTestTokenize() new RangeToken('{a TO b]', 0, '', 'a', 'b', 'exclusive', 'inclusive'), ], ], + [ + '[2017-01-01 TO 2017-01-05]', + [ + new RangeToken('[2017-01-01 TO 2017-01-05]', 0, '', '2017-01-01', '2017-01-05', 'inclusive', 'inclusive'), + ], + ], + [ + '[20 TO *]', + [ + new RangeToken('[20 TO *]', 0, '', '20', '*', 'inclusive', 'inclusive'), + ], + ], + [ + '[* TO 20]', + [ + new RangeToken('[* TO 20]', 0, '', '*', '20', 'inclusive', 'inclusive'), + ], + ], [ '"phrase"', [ diff --git a/tests/Galach/Tokenizer/TextTokenizerTest.php b/tests/Galach/Tokenizer/TextTokenizerTest.php index 1c9380a..73174a0 100644 --- a/tests/Galach/Tokenizer/TextTokenizerTest.php +++ b/tests/Galach/Tokenizer/TextTokenizerTest.php @@ -123,6 +123,27 @@ public static function setUpBeforeClass() new Token(Tokenizer::TOKEN_WHITESPACE, ' ', 5), new WordToken('b]', 6, '', 'b]'), ], + '[2017-01-01 TO 2017-01-05]' => [ + new WordToken('[2017-01-01', 0, '', '[2017-01-01'), + new Token(Tokenizer::TOKEN_WHITESPACE, ' ', 11), + new WordToken('TO', 12, '', 'TO'), + new Token(Tokenizer::TOKEN_WHITESPACE, ' ', 14), + new WordToken('2017-01-05]', 15, '', '2017-01-05]'), + ], + '[20 TO *]' => [ + new WordToken('[20', 0, '', '[20'), + new Token(Tokenizer::TOKEN_WHITESPACE, ' ', 3), + new WordToken('TO', 4, '', 'TO'), + new Token(Tokenizer::TOKEN_WHITESPACE, ' ', 6), + new WordToken('*]', 7, '', '*]'), + ], + '[* TO 20]' => [ + new WordToken('[*', 0, '', '[*'), + new Token(Tokenizer::TOKEN_WHITESPACE, ' ', 2), + new WordToken('TO', 3, '', 'TO'), + new Token(Tokenizer::TOKEN_WHITESPACE, ' ', 5), + new WordToken('20]', 6, '', '20]'), + ], 'domain:domain:' => [ new WordToken('domain:domain:', 0, '', 'domain:domain:'), ], From e9ca9e8ab54b349ada4a03168e5914fdc5a5d8e0 Mon Sep 17 00:00:00 2001 From: thePanz Date: Fri, 6 Apr 2018 11:46:16 +0200 Subject: [PATCH 6/6] Initial support for ranges with quotes --- lib/Languages/Galach/TokenExtractor/Full.php | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/lib/Languages/Galach/TokenExtractor/Full.php b/lib/Languages/Galach/TokenExtractor/Full.php index 59a4899..4e10304 100644 --- a/lib/Languages/Galach/TokenExtractor/Full.php +++ b/lib/Languages/Galach/TokenExtractor/Full.php @@ -36,7 +36,13 @@ final class Full extends TokenExtractor '/(?(?:(?(?[a-zA-Z0-9_][a-zA-Z0-9_\-.]*)))(?:[\s"()+!]|$)/Au' => Tokenizer::TOKEN_TERM, '/(?(?:(?(?[a-zA-Z0-9_][a-zA-Z0-9_\-.]*)))(?:[\s"()+!]|$)/Au' => Tokenizer::TOKEN_TERM, '/(?(?:(?[a-zA-Z_][a-zA-Z0-9_\-.]*):)?(?(?.*?)(?:(? Tokenizer::TOKEN_TERM, - '/(?(?:(?[a-zA-Z_][a-zA-Z0-9_\-.]*):)?(?[\[\{])(?([a-zA-Z0-9-]+|\*)) TO (?([a-zA-Z0-9-]+|\*))(?[\]\}]))/Aus' => Tokenizer::TOKEN_TERM, + // Handle of range + '/(?(?:(?[a-zA-Z_][a-zA-Z0-9_\-.]*):)?'. + '(?[\[\{])'. + '(?([a-zA-Z0-9_-]+|\*)|(?(?([a-zA-Z0-9_-]+|\*)|(?(?[\]\}]))/Aus' => Tokenizer::TOKEN_TERM, '/(?(?:(?[a-zA-Z_][a-zA-Z0-9_\-.]*):)?(?(?:\\\\\\\\|\\\\ |\\\\\(|\\\\\)|\\\\"|[^"()\s])+?))(?:(? Tokenizer::TOKEN_TERM, ];