Skip to content
This repository was archived by the owner on Oct 8, 2020. It is now read-only.

Commit 2055fe7

Browse files
committed
2 parents 386a790 + 2e66a09 commit 2055fe7

31 files changed

+22919
-621
lines changed

README.md

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,12 @@
11
# SANSA-Examples
2+
[![Build Status](https://ci.aksw.org/jenkins/job/SANSA%20Examples/job/develop/badge/icon)](https://ci.aksw.org/jenkins/job/SANSA%20Examples/job/develop/)
3+
[![License](https://img.shields.io/badge/License-Apache%202.0-blue.svg)](https://opensource.org/licenses/Apache-2.0)
4+
[![Twitter](https://img.shields.io/twitter/follow/SANSA_Stack.svg?style=social)](https://twitter.com/SANSA_Stack)
5+
26
This directory contains code examples for various SANSA functionality.
37

48
### [sansa-examples-spark](https://github.com/SANSA-Stack/SANSA-Examples/tree/master/sansa-examples-spark)
59
Contains the SANSA Examples for [Apache Spark](http://spark.apache.org/).
610

711
### [sansa-examples-flink](https://github.com/SANSA-Stack/SANSA-Examples/tree/master/sansa-examples-flink)
812
Contains the SANSA Examples for [Apache Flink](http://flink.apache.org/).
9-

pom.xml

Lines changed: 27 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
<modelVersion>4.0.0</modelVersion>
55
<groupId>net.sansa-stack</groupId>
66
<artifactId>sansa-examples-parent_2.11</artifactId>
7-
<version>2017-06</version>
7+
<version>2017-12-SNAPSHOT</version>
88
<packaging>pom</packaging>
99
<name>SANSA-Examples - Parent</name>
1010
<description>SANSA examples</description>
@@ -56,12 +56,13 @@
5656
<maven.compiler.source>1.8</maven.compiler.source>
5757
<maven.compiler.target>1.8</maven.compiler.target>
5858
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
59-
<scala.version>2.11.8</scala.version>
59+
<scala.version>2.11.11</scala.version>
6060
<scala.binary.version>2.11</scala.binary.version>
61-
<spark.version>2.1.1</spark.version>
62-
<flink.version>1.3.0</flink.version>
63-
<jena.version>3.1.1</jena.version>
64-
<sansa.version>0.2.0</sansa.version>
61+
<spark.version>2.2.1</spark.version>
62+
<flink.version>1.4.0</flink.version>
63+
<hadoop.version>2.7.0</hadoop.version>
64+
<jena.version>3.5.0</jena.version>
65+
<sansa.version>0.3.0</sansa.version>
6566
</properties>
6667

6768
<dependencyManagement>
@@ -102,6 +103,23 @@
102103
<version>${flink.version}</version>
103104
</dependency>
104105

106+
<!-- Hadoop dependencies (mainly used for InputFormat definitions) -->
107+
<dependency>
108+
<groupId>org.apache.hadoop</groupId>
109+
<artifactId>hadoop-mapreduce-client-core</artifactId>
110+
<version>${hadoop.version}</version>
111+
</dependency>
112+
<dependency>
113+
<groupId>org.apache.hadoop</groupId>
114+
<artifactId>hadoop-common</artifactId>
115+
<version>${hadoop.version}</version>
116+
</dependency>
117+
<dependency>
118+
<groupId>org.apache.hadoop</groupId>
119+
<artifactId>hadoop-streaming</artifactId>
120+
<version>${hadoop.version}</version>
121+
</dependency>
122+
105123
<!-- Apache JENA 3.x -->
106124
<dependency>
107125
<groupId>org.apache.jena</groupId>
@@ -142,11 +160,8 @@
142160
</dependency>
143161

144162
<!-- Guava -->
145-
<dependency>
146-
<groupId>com.google.guava</groupId>
147-
<artifactId>guava</artifactId>
148-
<version>19.0</version>
149-
</dependency>
163+
<!-- <dependency> <groupId>com.google.guava</groupId> <artifactId>guava</artifactId>
164+
<version>19.0</version> </dependency> -->
150165

151166
<!-- RDF Layer -->
152167
<dependency>
@@ -157,7 +172,7 @@
157172

158173
<dependency>
159174
<groupId>${project.groupId}</groupId>
160-
<artifactId>sansa-rdf-flink_${scala.binary.version}</artifactId>
175+
<artifactId>sansa-rdf-flink-core_${scala.binary.version}</artifactId>
161176
<version>${sansa.version}</version>
162177
</dependency>
163178
<!-- OWL Layer -->

sansa-examples-flink/pom.xml

Lines changed: 18 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
<parent>
66
<artifactId>sansa-examples-parent_2.11</artifactId>
77
<groupId>net.sansa-stack</groupId>
8-
<version>2017-06</version>
8+
<version>2017-12-SNAPSHOT</version>
99
</parent>
1010
<artifactId>sansa-examples-flink_2.11</artifactId>
1111
<name>SANSA Examples - Apache Flink</name>
@@ -15,13 +15,23 @@
1515
<!-- SANSA RDF -->
1616
<dependency>
1717
<groupId>${project.groupId}</groupId>
18-
<artifactId>sansa-rdf-flink_${scala.binary.version}</artifactId>
18+
<artifactId>sansa-rdf-flink-core_${scala.binary.version}</artifactId>
1919
</dependency>
2020

2121
<!-- SANSA OWL -->
2222
<dependency>
2323
<groupId>${project.groupId}</groupId>
2424
<artifactId>sansa-owl-flink_${scala.binary.version}</artifactId>
25+
<exclusions>
26+
<exclusion>
27+
<groupId>org.mortbay.jetty</groupId>
28+
<artifactId>jetty</artifactId>
29+
</exclusion>
30+
<exclusion>
31+
<groupId>org.mortbay.jetty</groupId>
32+
<artifactId>jetty-util</artifactId>
33+
</exclusion>
34+
</exclusions>
2535
</dependency>
2636

2737
<!-- SANSA Inference -->
@@ -33,16 +43,15 @@
3343
<!-- SANSA Query -->
3444

3545
<!-- SANSA ML -->
36-
<dependency>
37-
<groupId>${project.groupId}</groupId>
38-
<artifactId>sansa-ml-flink_${scala.binary.version}</artifactId>
39-
</dependency>
46+
<dependency>
47+
<groupId>${project.groupId}</groupId>
48+
<artifactId>sansa-ml-flink_${scala.binary.version}</artifactId>
49+
</dependency>
4050

4151
<!-- Scala -->
4252
<dependency>
4353
<groupId>org.scala-lang</groupId>
4454
<artifactId>scala-library</artifactId>
45-
<version>${scala.version}</version>
4655
</dependency>
4756

4857
<!-- Apache Flink -->
@@ -75,7 +84,6 @@
7584
<dependency>
7685
<groupId>junit</groupId>
7786
<artifactId>junit</artifactId>
78-
<version>4.11</version>
7987
<scope>test</scope>
8088
</dependency>
8189
<dependency>
@@ -93,6 +101,8 @@
93101
</dependencies>
94102

95103
<build>
104+
<sourceDirectory>src/main/scala</sourceDirectory>
105+
<testSourceDirectory>src/test/scala</testSourceDirectory>
96106
<plugins>
97107
<plugin>
98108
<groupId>org.apache.maven.plugins</groupId>
Lines changed: 130 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -1,70 +1,162 @@
11
package net.sansa_stack.examples.flink.inference
22

3-
import java.io.File
3+
import java.io.{ File, FileInputStream }
4+
import java.net.URI
5+
import java.util.Properties
46

5-
import scala.collection.mutable
7+
import scala.io.Source
8+
9+
import com.typesafe.config.ConfigFactory
10+
import org.apache.flink.api.java.utils.ParameterTool
611
import org.apache.flink.api.scala.ExecutionEnvironment
12+
import org.apache.flink.configuration.Configuration
13+
import org.apache.flink.runtime.webmonitor.WebMonitorUtils
14+
15+
import net.sansa_stack.inference.flink.data.{ RDFGraphLoader, RDFGraphWriter }
16+
import net.sansa_stack.inference.flink.forwardchaining.{
17+
ForwardRuleReasonerOWLHorst,
18+
ForwardRuleReasonerRDFS
19+
}
720
import net.sansa_stack.inference.rules.ReasoningProfile._
8-
import net.sansa_stack.inference.flink.data.RDFGraphLoader
9-
import net.sansa_stack.inference.flink.forwardchaining.{ForwardRuleReasonerOWLHorst, ForwardRuleReasonerRDFS}
10-
import net.sansa_stack.inference.rules.ReasoningProfile
11-
import net.sansa_stack.inference.flink.data.RDFGraphWriter
21+
import net.sansa_stack.inference.rules.{ RDFSLevel, ReasoningProfile }
1222

1323
object RDFGraphInference {
1424

15-
def main(args: Array[String]) {
16-
if (args.length < 3) {
17-
System.err.println(
18-
"Usage: RDFGraphInference <input> <output> <reasoner")
19-
System.err.println("Supported 'reasoner' as follows:")
20-
System.err.println(" rdfs Forward Rule Reasoner RDFS")
21-
System.err.println(" owl-horst Forward Rule Reasoner OWL Horst")
22-
System.exit(1)
25+
def main(args: Array[String]) {
26+
parser.parse(args, Config()) match {
27+
case Some(config) =>
28+
run(
29+
args,
30+
config.in,
31+
config.out,
32+
config.profile,
33+
config.writeToSingleFile,
34+
config.sortedOutput,
35+
config.propertiesFile,
36+
config.jobName)
37+
case None =>
38+
println(parser.usage)
2339
}
24-
val input = args(0)
25-
val output = args(1)
26-
val argprofile = args(2)
40+
}
2741

28-
val profile = argprofile match {
29-
case "rdfs" => ReasoningProfile.RDFS
30-
case "owl-horst" => ReasoningProfile.OWL_HORST
42+
def run(
43+
args: Array[String],
44+
input: Seq[URI],
45+
output: URI,
46+
profile: ReasoningProfile,
47+
writeToSingleFile: Boolean,
48+
sortedOutput: Boolean,
49+
propertiesFile: File,
50+
jobName: String): Unit = {
3151

32-
}
33-
val optionsList = args.drop(3).map { arg =>
34-
arg.dropWhile(_ == '-').split('=') match {
35-
case Array(opt, v) => (opt -> v)
36-
case _ => throw new IllegalArgumentException("Invalid argument: " + arg)
37-
}
38-
}
39-
val options = mutable.Map(optionsList: _*)
52+
// read reasoner optimization properties
53+
val reasonerConf =
54+
if (propertiesFile != null) ConfigFactory.parseFile(propertiesFile)
55+
else ConfigFactory.load("reasoner")
56+
57+
// get params
58+
val params: ParameterTool = ParameterTool.fromArgs(args)
4059

41-
options.foreach {
42-
case (opt, _) => throw new IllegalArgumentException("Invalid option: " + opt)
43-
}
4460
println("======================================")
4561
println("| RDF Graph Inference |")
4662
println("======================================")
4763

64+
val conf = new Configuration()
65+
conf.setInteger("taskmanager.network.numberOfBuffers", 3000)
66+
67+
// set up the execution environment
4868
val env = ExecutionEnvironment.getExecutionEnvironment
69+
env.getConfig.disableSysoutLogging()
70+
71+
// make parameters available in the web interface
72+
env.getConfig.setGlobalJobParameters(params)
4973

5074
// load triples from disk
51-
val graph = RDFGraphLoader.loadFromFile(new File(input).getAbsolutePath, env)
52-
println(s"|G|=${graph.size()}")
75+
val graph = RDFGraphLoader.loadFromDisk(input, env)
76+
println(s"|G| = ${graph.size()}")
5377

5478
// create reasoner
5579
val reasoner = profile match {
56-
case RDFS => new ForwardRuleReasonerRDFS(env)
80+
case RDFS | RDFS_SIMPLE =>
81+
val r = new ForwardRuleReasonerRDFS(env)
82+
r.useSchemaBroadCasting = reasonerConf.getBoolean("reasoner.rdfs.schema.broadcast")
83+
r.extractSchemaTriplesInAdvance =
84+
reasonerConf.getBoolean("reasoner.rdfs.schema.extractTriplesInAdvance")
85+
if (profile == RDFS_SIMPLE) r.level = RDFSLevel.SIMPLE
86+
r
5787
case OWL_HORST => new ForwardRuleReasonerOWLHorst(env)
5888
}
5989

6090
// compute inferred graph
6191
val inferredGraph = reasoner.apply(graph)
62-
println(s"|G_inferred|=${inferredGraph.size()}")
92+
println(s"|G_inf| = ${inferredGraph.size()}")
93+
94+
val jn = if (jobName.isEmpty) s"RDF Graph Inference ($profile)" else jobName
95+
96+
// run the program
97+
env.execute(jn)
98+
}
99+
100+
// the config object
101+
case class Config(
102+
in: Seq[URI] = Seq(),
103+
out: URI = new URI("."),
104+
profile: ReasoningProfile = ReasoningProfile.RDFS,
105+
writeToSingleFile: Boolean = false,
106+
sortedOutput: Boolean = false,
107+
propertiesFile: File = null,
108+
jobName: String = "") // new File(getClass.getResource("reasoner.properties").toURI)
109+
110+
// read ReasoningProfile enum
111+
implicit val profilesRead: scopt.Read[ReasoningProfile.Value] =
112+
scopt.Read.reads(ReasoningProfile forName _.toLowerCase())
113+
114+
// the CLI parser
115+
val parser = new scopt.OptionParser[Config]("RDFGraphMaterializer") {
116+
head("RDFGraphMaterializer", "0.1.0")
117+
118+
opt[Seq[URI]]('i', "input")
119+
.required()
120+
.valueName("<path>")
121+
.action((x, c) => c.copy(in = x))
122+
.text("path to file or directory that contains the input files (in N-Triple format)")
123+
124+
opt[URI]('o', "out")
125+
.required()
126+
.valueName("<directory>")
127+
.action((x, c) => c.copy(out = x))
128+
.text("the output directory")
129+
130+
opt[Unit]("single-file")
131+
.optional()
132+
.action((_, c) => c.copy(writeToSingleFile = true))
133+
.text("write the output to a single file in the output directory")
134+
135+
opt[Unit]("sorted")
136+
.optional()
137+
.action((_, c) => c.copy(sortedOutput = true))
138+
.text("sorted output of the triples (per file)")
139+
140+
opt[ReasoningProfile]('p', "profile")
141+
.required()
142+
.valueName("{rdfs | rdfs-simple | owl-horst}")
143+
.action((x, c) => c.copy(profile = x))
144+
.text("the reasoning profile")
145+
146+
opt[File]('p', "prop")
147+
.optional()
148+
.valueName("<path_to_properties_file>")
149+
.action((x, c) => c.copy(propertiesFile = x))
150+
.text("the (optional) properties file which allows some more advanced options")
63151

64-
// write triples to disk
65-
RDFGraphWriter.writeToFile(inferredGraph, new File(output).getAbsolutePath)
152+
opt[String]('j', "jobName")
153+
.optional()
154+
.valueName("<name_of_the_Flink_job>")
155+
.action((x, c) => c.copy(jobName = x))
156+
.text("the name of the Flink job that occurs also in the Web-UI")
66157

67-
env.execute(s"RDF Graph Inference ($profile)")
158+
help("help").text("prints this usage text")
68159

69160
}
161+
parser.showUsageOnError
70162
}

0 commit comments

Comments
 (0)