diff --git a/.travis.yml b/.travis.yml new file mode 100644 index 0000000..603b90f --- /dev/null +++ b/.travis.yml @@ -0,0 +1,18 @@ +# this is a java project using maven +language: java +jdk: +# oracle jdk8 and jdk9 does not work on default dist +# - oraclejdk8 +# - oraclejdk9 + - oraclejdk11 + - openjdk8 + - openjdk-ea +# switch off gpg handling, javadoc and tests +install: + - mvn install -DskipTests -Dmaven.javadoc.skip=true -Dgpg.skip=true +script: + - mvn test -Dtest=com.blazegraph.gremlin.structure.TestBasicOperations + - mvn test -Dtest=com.blazegraph.gremlin.structure.TestBulkLoad + - mvn test -Dtest=com.blazegraph.gremlin.structure.TestHistory + - mvn test -Dtest=com.blazegraph.gremlin.structure.TestSearch + - mvn test -Dtest=com.blazegraph.gremlin.structure.SampleCode diff --git a/README.md b/README.md index ecfc53f..94e92ae 100644 --- a/README.md +++ b/README.md @@ -1,8 +1,21 @@ # Blazegraph TinkerPop3 Implementation (blazegraph-gremlin) -======= + +[![Travis (.org)](https://img.shields.io/travis/BITPlan/tinkerpop3)](https://travis-ci.org/BITPlan/tinkerpop3) +[![GitHub](https://img.shields.io/github/license/BITPlan/tinkerpop3.svg)](https://www.gnu.org/licenses/old-licenses/gpl-2.0.en.html) +[![BITPlan](http://wiki.bitplan.com/images/wiki/thumb/3/38/BITPlanLogoFontLessTransparent.png/198px-BITPlanLogoFontLessTransparent.png)](http://www.bitplan.com) + +This is a fork of Blazegraph Tinkerpop trying to support more recent versions of Apache Tinkerpop. ![Blazegraph TinkerPop Logo](images/blazegraph-gremlin.png) +# Version History +| Version | date | changes +| ------: | ---------- | ------------------------ +| 1.0.0 | 2016 | original fork +| 1.0.1 | 2018-12 | upgrading to Tinkerpop 3.2.10 +| 1.0.2 | 2019-09-24 | upgrading to Tinkerpop 3.2.11 + + Welcome to the [Blazegraph](https://www.blazegraph.com)/[TinkerPop3](http://tinkerpop.incubator.apache.org/) project. The TP3 implementation has some significant differences from the TP2 version. The data model has been changed to use RDF*, an RDF reification framework described [here](https://wiki.blazegraph.com/wiki/index.php/Reification_Done_Right). The concept behind blazegraph-gremlin is that property graph (PG) data can be loaded and accessed via the TinkerPop3 API, but underneath the hood the data will be stored as RDF using the PG data model described in this document. Once PG data has been loaded you can interact with it just like you would interact with ordinary RDF - you can run SPARQL queries or interact with the data via the SAIL API. It just works. The PG data model is also customizable via a round-tripping interface called the BlazeValueFactory, also described in detail in this document. @@ -40,7 +53,7 @@ To build blazegraph-gremlin: To import blazegraph-gremlin into Eclipse: > mvn eclipse:eclipse - + Then select "File-Import-Existing Projects Into Workspace" from the Eclipse menu and select the root directory of this project. Continue reading this document and take a look at SampleCode.java provided in blazegraph-gremlin/src/test for information on how to get started writing your TP3 application with Blazegraph. @@ -56,7 +69,7 @@ Once you restart the console, activate the blazegraph-gremlin plugin: gremlin> :plugin use tinkerpop.blazegraph ==>tinkerpop.blazegraph activated - + You can then open a BlazeGraph instance by specifying the location you would like to use for the persistent journal file: gremlin> g = BlazeGraphFactory.open("/tmp/blazegraph.jnl") @@ -66,7 +79,7 @@ You can then open a BlazeGraph instance by specifying the location you would lik Reliable Affordable Web-Scale Computing for the Enterprise - + Copyright SYSTAP, LLC DBA Blazegraph 2006-2016. All rights reserved. ==>blazegraphembedded[vertices:0 edges:0] @@ -75,10 +88,10 @@ You can then open a BlazeGraph instance by specifying the location you would lik It's important to understand how Blazegraph organizes property graph data as RDF. Blazegraph uses the RDF* framework, which is an extension to RDF that provides for an easier RDF reification syntax. Reification is a means of using an RDF statement as an RDF value in other statements. The RDF* syntax for an RDF "statement as a value" is as follows: # statement - :john :knows :mary . + :john :knows :mary . # "statement as value" <<:john :knows :mary>> dc:source . - + Blazegraph uses the OpenRDF SAIL API and represents RDF* reified statements as bnodes in that API. This is important for understanding how to write SPARQL queries against TP3 graphs and how to interpret query results. Property graph values must be converted into RDF values and vice versa. Blazegraph provides a BlazeValueFactory interface with a default implementation. You can extend this interface and provide your own value factory if you prefer a custom look for the RDF values in your property graph. @@ -91,11 +104,11 @@ Property graph elements are represented as follows in Blazegraph: # BlazeVertex john = graph.addVertex(T.id, "john", T.label, "person"); blaze:john rdf:type blaze:person . - + # BlazeEdge knows = graph.addEdge(john, mary, "knows", T.id, "k01"); blaze:john blaze:k01 blaze:mary . <> rdf:type blaze:knows . - + Vertices requires one statement, edges require two. Edge properties are simply attached to the reified edge statement: @@ -106,18 +119,18 @@ Edge properties are simply attached to the reified edge statement: Representation of vertex properties depends on the cardinality of the key. Cardinality.single and .set look the same, .list is represented differently. Vertices can mix and match cardinalities for different keys. All three cardinalities are supported, but Cardinality.set is the one most closely aligned with RDF and as such will provide the best performance of the three. User supplied ids are NOT supported for vertex properties. Cardinality.set and Cardinality.single are modeled the same way: - + # VertexProperty set = john.property(Cardinality.set, "age", 25, "acl", "public"); blaze:john blaze:age "25"^^xsd:int . <> blaze:acl "public" . - + Cardinality.list is modeled differently: - + # VertexProperty list = john.property(Cardinality.list, "city", "salt lake city", "acl", "public"); blaze:john blaze:city "12765"^^bg:listIndex . <> rdf:value "salt lake city" . <> blaze:acl "public" . - + Cardinality.list uses a specially datatyped and monotonically increasing internal identifier to represent the vertex property (the actual datatype is ``). This identifier serves to manage duplicate list values and ordering of list items. It's important to note this difference as different cardinalities will require different SPARQL queries. #### Putting it all together: The Crew @@ -126,13 +139,13 @@ Here is how the TinkerPop3 "Crew" dataset looks when loaded into Blazegraph. Hu blaze:tinkergraph rdf:type blaze:software ; blaze:name "tinkergraph" . - + blaze:gremlin rdf:type blaze:software ; blaze:name "gremlin" ; blaze:48f63 blaze:tinkergraph . - + <> rdf:type blaze:traverses . - + blaze:daniel rdf:type blaze:person ; blaze:name "daniel" ; blaze:81056 blaze:tinkergraph ; @@ -140,7 +153,7 @@ Here is how the TinkerPop3 "Crew" dataset looks when loaded into Blazegraph. Hu blaze:location "spremberg" ; blaze:location "kaiserslautern" ; blaze:location "aachen" . - + <> rdf:type blaze:uses ; blaze:skill "3"^^xsd:int . <> rdf:type blaze:uses ; @@ -150,7 +163,7 @@ Here is how the TinkerPop3 "Crew" dataset looks when loaded into Blazegraph. Hu <> blaze:startTime "2005"^^xsd:int ; blaze:endTime "2009"^^xsd:int . <> blaze:startTime "2009"^^xsd:int . - + blaze:marko rdf:type blaze:person ; blaze:name "marko" ; blaze:42af2 blaze:gremlin ; @@ -177,7 +190,7 @@ Here is how the TinkerPop3 "Crew" dataset looks when loaded into Blazegraph. Hu <> blaze:startTime "2004"^^xsd:int ; blaze:endTime "2005"^^xsd:int . <> blaze:startTime "2005"^^xsd:int . - + blaze:stephen rdf:type blaze:person ; blaze:name "stephen" ; blaze:15869 blaze:tinkergraph ; @@ -201,7 +214,7 @@ Here is how the TinkerPop3 "Crew" dataset looks when loaded into Blazegraph. Hu <> blaze:startTime "2000"^^xsd:int ; blaze:endTime "2006"^^xsd:int . <> blaze:startTime "2006"^^xsd:int . - + blaze:matthias rdf:type blaze:person ; blaze:name "matthias" ; blaze:7373e blaze:gremlin ; @@ -211,7 +224,7 @@ Here is how the TinkerPop3 "Crew" dataset looks when loaded into Blazegraph. Hu blaze:location "baltimore" ; blaze:location "oakland" ; blaze:location "seattle" . - + <> rdf:type blaze:develops ; blaze:since "2012"^^xsd:int . <> rdf:type blaze:uses ; @@ -225,7 +238,7 @@ Here is how the TinkerPop3 "Crew" dataset looks when loaded into Blazegraph. Hu <> blaze:startTime "2011"^^xsd:int ; blaze:endTime "2014"^^xsd:int . <> blaze:startTime "2014"^^xsd:int . - + ## Getting up and running with Blazegraph/TP3 Currently **BlazeGraphEmbedded** is the only concrete implementation of the Blazegraph TinkerPop3 API. BlazeGraphEmbedded is backed by an embedded (same JVM) instance of Blazegraph. This puts the enterprise features of Blazegraph (high-availability, scale-out, etc.) out of reach for the 1.0 version of the TP3 integration, since those features are accessed via Blazegraph's client/server API. A TP3 integration with the client/server version of Blazegraph is reserved for a future blazegraph-tinkerpop release. @@ -233,24 +246,24 @@ Currently **BlazeGraphEmbedded** is the only concrete implementation of the Blaz BlazeGraphEmbedded is instantiated by providing an open and initialized Blazegraph RDF repository (OpenRDF SAIL). There a numerous resources available at [blazegraph.com](http://wiki.blazegraph.com) on how to configure a Blazegraph SAIL, however blazegraph-gremlin comes with a quick start factory that will allow you to get up and running with Blazegraph with a reasonable set of defaults for the TinkerPop3 API. BasicRepositoryProvider in blazegraph-gremlin/src/main allows you to create or open an RDF repository backed by a persistent journal file at a specified location. This RDF repository can then be used to open a BlazeGraphEmbedded instance: /* - * A journal file is the persistence mechanism for an embedded + * A journal file is the persistence mechanism for an embedded * Blazegraph instance. */ String journal = file.getAbsolutePath(); - + /* * BasicRepositoryProvider will create a Blazegraph repository using the * specified journal file with a reasonable default configuration set * for the TinkerPop3 API. This will also open a previously created * repository if the specified journal already exists. - * + * * ("Bigdata" is the legacy product name for Blazegraph). - * - * See BasicRepositoryProvider for more details on the default SAIL + * + * See BasicRepositoryProvider for more details on the default SAIL * configuration. */ BigdataSailRepository repo = BasicRepositoryProvider.open(journal); - + /* * Open a BlazeGraphEmbedded instance with no additional configuration. * See BlazeGraphEmbedded.Options for additional configuration options. @@ -280,7 +293,7 @@ The bulk load API can be used in several ways: final TinkerGraph theCrew = TinkerFactory.createTheCrew(); graph.bulkLoad(theCrew); graph.tx().commit(); - + /* * Execute a code block in bulk load mode. */ @@ -289,18 +302,18 @@ The bulk load API can be used in several ways: graph.addVertex(T.id, "b"); }); graph.tx().commit(); - + /* - * Manually set and reset bulk load mode. + * Manually set and reset bulk load mode. */ graph.setBulkLoad(true); graph.addVertex(T.id, "c"); graph.addVertex(T.id, "d"); graph.setBulkLoad(false); graph.tx().commit(); - -Be careful not to introduce consistency errors while in bulk load mode. - + +Be careful not to introduce consistency errors while in bulk load mode. + graph.bulkLoad(() -> { final BlazeVertex e = graph.addVertex(T.id, "e", T.label, "foo"); e.property(Cardinality.single, "someKey", "v1"); @@ -310,7 +323,7 @@ Be careful not to introduce consistency errors while in bulk load mode. * resulting in two values for Cardinality.single. */ assertEquals(2, e.properties("someKey").count()); - + graph.addVertex(T.id, "e", T.label, "bar"); /* * Consistency error - we've created a new vertex with the same id @@ -336,7 +349,7 @@ The search API lets you use Blazegraph's built-in full text index to perform Luc v.property(Cardinality.set, "key", "hello bar foo"); }); graph.tx().commit(); - + // all four vertex properties contain "foo" or "bar" assertEquals(4, graph.search("foo bar", Match.ANY).count()); // three contain "foo" @@ -347,7 +360,7 @@ The search API lets you use Blazegraph's built-in full text index to perform Luc assertEquals(2, graph.search("foo bar", Match.ALL).count()); // and only one contains exactly "foo bar" assertEquals(1, graph.search("foo bar", Match.EXACT).count()); - + // prefix match assertEquals(4, graph.search("hell*", Match.ANY).count()); @@ -360,34 +373,34 @@ The listener API look as follows: @FunctionalInterface public interface BlazeGraphListener { - + /** * Notification of an edit to the graph. - * + * * @param edit * the {@link BlazeGraphEdit} * @param raw * toString() version of the raw RDF mutation */ void graphEdited(BlazeGraphEdit edit, String rdfEdit); - + /** * Notification of a transaction committed. - * + * * @param commitTime * the timestamp on the commit */ default void transactionCommited(long commitTime) { // noop default impl } - + /** * Notification of a transaction abort. */ default void transactionAborted() { // noop default impl } - + } Sample usage of this API can be found in SampleCode.demonstrateListenerAPI(). @@ -403,19 +416,19 @@ Sample usage of this API can be found in SampleCode.demonstrateHistoryAPI(), whi */ final BlazeVertex a = graph.addVertex(T.id, "a"); graph.tx().commit(); - + /* * Add a property. */ a.property(Cardinality.single, "key", "foo"); graph.tx().commit(); - + /* * Change the value. */ a.property(Cardinality.single, "key", "bar"); graph.tx().commit(); - + /* * Remove the vertex. */ @@ -467,9 +480,9 @@ Sample usage of this API can be found in SampleCode.demonstrateSparqlAPI(). Thi graph.tx().commit(); /* - * "Who created a project named 'lop' that was also created by someone + * "Who created a project named 'lop' that was also created by someone * who is 29 years old? Return the two creators." - * + * * gremlin> g.V().match( * __.as('a').out('created').as('b'), * __.as('b').has('name', 'lop'), @@ -480,7 +493,7 @@ Sample usage of this API can be found in SampleCode.demonstrateSparqlAPI(). Thi * ==>[a:josh, c:marko] * ==>[a:peter, c:marko] */ - final String sparql = + final String sparql = "select ?a ?c { " + // vertex named "lop" " ?lop \"lop\" . " + diff --git a/pom.xml b/pom.xml index 4a3431e..c53b77f 100644 --- a/pom.xml +++ b/pom.xml @@ -28,7 +28,7 @@ Copyright 2010 by TalkingTrends (Amsterdam, The Netherlands) com.blazegraph blazegraph-gremlin jar - 1.0.1-SNAPSHOT + 1.0.2 blazegraph-gremlin Welcome to the Blazegraph/TinkerPop3 project. The TP3 implementation has some significant differences from the TP2 version. The data model has been changed to use RDF*, an RDF reification framework described here: https://wiki.blazegraph.com/wiki/index.php/Reification_Done_Right. @@ -65,10 +65,13 @@ The concept behind blazegraph-gremlin is that property graph (PG) data can be lo UTF-8 - 9.2.3.v20140905 - 3.1.0-incubating - 2.0.0 + 9.4.14.v20181114 + + 3.2.11 + 3.2.11 + 2.1.2 1.3.1 + 3.4.6 @@ -94,9 +97,9 @@ The concept behind blazegraph-gremlin is that property graph (PG) data can be lo maven-central - @@ -209,6 +212,19 @@ The concept behind blazegraph-gremlin is that property graph (PG) data can be lo 1.8 + + org.apache.maven.plugins + maven-surefire-plugin + 2.22.2 + + + **/*Test*.java + + + **/*TestSuite.java + + + org.apache.maven.plugins maven-deploy-plugin @@ -283,7 +299,7 @@ The concept behind blazegraph-gremlin is that property graph (PG) data can be lo org.apache.tinkerpop gremlin-groovy-test - ${tp3.version} + ${tp3.gremlin-groovy-test.version} test diff --git a/scripts/test b/scripts/test new file mode 100755 index 0000000..dd97615 --- /dev/null +++ b/scripts/test @@ -0,0 +1,22 @@ +#!/bin/bash +LC_NUMERIC="en_US.UTF-8" +TIMES=() +PACKAGES=() +mvn clean install -DskipTests -Dmaven.javadoc.skip=true -Dgpg.skip=true +for package in TestBasicOperations TestBulkLoad TestHistory TestListeners TestSearch SampleCode TestSearch +do + START=$(date +%s.%N) + mvn test -Dtest=com.blazegraph.gremlin.structure.$package + END=$(date +%s.%N) + DIFF=$(echo "$END - $START" | bc) + TIMES+=($DIFF) + PACKAGES+=($package) +done +# get length of an array +len=${#PACKAGES[@]} +for (( i=1; i<${len}+1; i++ )); +do + package=${PACKAGES[$i-1]} + DIFF=${TIMES[$i-1]} + printf "%2d/%2d testing %20s took %.1f secs\n" $i $len $package $DIFF +done diff --git a/src/main/java/com/blazegraph/gremlin/internal/Tinkerpop3ExtensionFactory.java b/src/main/java/com/blazegraph/gremlin/internal/Tinkerpop3ExtensionFactory.java index 3b8ac76..8113ca5 100644 --- a/src/main/java/com/blazegraph/gremlin/internal/Tinkerpop3ExtensionFactory.java +++ b/src/main/java/com/blazegraph/gremlin/internal/Tinkerpop3ExtensionFactory.java @@ -37,9 +37,15 @@ */ public class Tinkerpop3ExtensionFactory extends DefaultExtensionFactory { + /** + * initialize the extension factory + * @param resolver + * @param lex + * @param extensions + */ protected void _init(final IDatatypeURIResolver resolver, final ILexiconConfiguration lex, - final Collection extensions) { + final Collection> extensions) { /* * Add ListIndexExtension for Cardinality.list. diff --git a/src/test/java/com/blazegraph/gremlin/structure/SampleCode.java b/src/test/java/com/blazegraph/gremlin/structure/SampleCode.java index 144439f..b6889d9 100644 --- a/src/test/java/com/blazegraph/gremlin/structure/SampleCode.java +++ b/src/test/java/com/blazegraph/gremlin/structure/SampleCode.java @@ -22,13 +22,11 @@ */ package com.blazegraph.gremlin.structure; -import static java.util.stream.Collectors.toSet; import static org.junit.Assert.assertEquals; import java.io.File; import java.util.LinkedList; import java.util.List; -import java.util.stream.Collectors; import org.apache.tinkerpop.gremlin.structure.Edge; import org.apache.tinkerpop.gremlin.structure.T; diff --git a/src/test/java/com/blazegraph/gremlin/structure/StructureStandardSuite.java b/src/test/java/com/blazegraph/gremlin/structure/StructureStandardSuite.java index b00e3fc..7864b48 100644 --- a/src/test/java/com/blazegraph/gremlin/structure/StructureStandardSuite.java +++ b/src/test/java/com/blazegraph/gremlin/structure/StructureStandardSuite.java @@ -93,7 +93,8 @@ public class StructureStandardSuite extends AbstractGremlinSuite { DetachedVertexTest.class, EdgeTest.class, FeatureSupportTest.class, - GraphTest.class, + // takes 5 min to test + // GraphTest.class, GraphConstructionTest.class, ModifiedVertexPropertyTest.class, VariablesTest.class, @@ -106,8 +107,9 @@ public class StructureStandardSuite extends AbstractGremlinSuite { ModifiedStarGraphTest.class, VertexTest.class, ModifiedTransactionTest.class, - CommunityGeneratorTest.class, - DistributionGeneratorTest.class, + // takes over 1/2 hour to test + // CommunityGeneratorTest.class, + // DistributionGeneratorTest.class, IoCustomTest.class, IoEdgeTest.class, IoGraphTest.class, diff --git a/src/test/java/com/blazegraph/gremlin/structure/TestBasicOperations.java b/src/test/java/com/blazegraph/gremlin/structure/TestBasicOperations.java index 84541b8..dabaff4 100644 --- a/src/test/java/com/blazegraph/gremlin/structure/TestBasicOperations.java +++ b/src/test/java/com/blazegraph/gremlin/structure/TestBasicOperations.java @@ -25,7 +25,6 @@ import static java.util.stream.Collectors.toList; import java.util.List; -import java.util.stream.Collectors; import org.apache.tinkerpop.gremlin.structure.Direction; import org.apache.tinkerpop.gremlin.structure.Edge; diff --git a/src/test/java/com/blazegraph/gremlin/structure/TestSuite.java b/src/test/java/com/blazegraph/gremlin/structure/TestSuite.java new file mode 100644 index 0000000..409f4fa --- /dev/null +++ b/src/test/java/com/blazegraph/gremlin/structure/TestSuite.java @@ -0,0 +1,41 @@ +/** +Copyright (C) SYSTAP, LLC DBA Blazegraph 2018. All rights reserved. + +Contact: + SYSTAP, LLC DBA Blazegraph + 2501 Calvert ST NW #106 + Washington, DC 20008 + licenses@blazegraph.com + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ +package com.blazegraph.gremlin.structure; + +import org.junit.runner.RunWith; +import org.junit.runners.Suite; + +@RunWith(Suite.class) +@Suite.SuiteClasses({ TestBasicOperations.class, TestBulkLoad.class, + TestHistory.class,TestSearch.class, SampleCode.class + // uncomment to run full testsuite with some 810 tests + ,BlazeGraphStructureStandardTest.class + }) +/** + * run test as a suite + * @author wf + * + */ +public class TestSuite { + +} diff --git a/src/test/resources/log4j.properties b/src/test/resources/log4j.properties index 9125f58..5f9b67a 100644 --- a/src/test/resources/log4j.properties +++ b/src/test/resources/log4j.properties @@ -28,18 +28,19 @@ log4j.logger.org.openrdf.query.resultio=ERROR #log4j.logger.com.bigdata.txLog=INFO #log4j.logger.com.blazegraph.gremlin=ALL -#og4j.logger.com.blazegraph.gremlin.structure.BlazeGraph=DEBUG +#log4j.logger.com.blazegraph.gremlin.structure.BlazeGraph=DEBUG #log4j.logger.com.blazegraph.gremlin.structure.BlazeGraph.SparqlLog=TRACE #log4j.logger.com.blazegraph.gremlin.structure.StructureStandardSuite=ALL #log4j.logger.com.blazegraph.gremlin.structure.TestBasicOperations=ALL #log4j.logger.com.blazegraph.gremlin.structure.TestHistory=ALL #log4j.logger.com.blazegraph.gremlin.structure.TestListeners=ALL #log4j.logger.com.blazegraph.gremlin.structure.TestSearch=ALL +#log4j.logger.com.blazegraph.gremlin.structure.BlazeGraphStructureStandardTest=INFO log4j.logger.com.blazegraph.gremlin.structure.SampleCode=ALL # Test suite loggers. -#log4j.logger.junit=INFO +log4j.logger.junit=INFO #log4j.logger.com.bigdata.btree.AbstractBTreeTestCase=INFO log4j.logger.junit.framework.TestCase2=ERROR