From 19e3e48a0f26383a8eb3606bc701c24d7994b891 Mon Sep 17 00:00:00 2001 From: ginesam Date: Thu, 5 Jul 2018 17:34:14 +0900 Subject: [PATCH 01/12] new readme --- README.md | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/README.md b/README.md index a68201fe..8ab65d6f 100644 --- a/README.md +++ b/README.md @@ -46,6 +46,7 @@ coqc coqlib.v ``` Our system assigns semantics to CCG structures. At the moment, we support C&C for English, and Jigg for Japanese. +If you are working with templates that require semantic tags, you will also need a universal semantic tagger. ### Installing [C&C parser](http://www.cl.cam.ac.uk/~sc609/candc-1.00.html) (for English) @@ -72,6 +73,16 @@ Simply do: The command above will download Jigg, its models, and create the file `ja/jigg_location.txt` where the path to Jigg is specified. That is all. +### Installing [semtagger](https://github.com/ginesam/semtagger) (optional) + +The tagger can be obtained directly from its repository: + +```bash +git clone https://github.com/ginesam/semtagger "/path/to/semtagger/" +``` + +Note that after downloading, you must follow the instructions given in order to train a tagging model. + ## Using the Semantic Parser Let's assume that we have a file `sentences.txt` with one sentence per line, From a3bb1c3467b1792eb54f841a5970906caa7d38af Mon Sep 17 00:00:00 2001 From: ginesam Date: Thu, 26 Jul 2018 10:23:23 +0900 Subject: [PATCH 02/12] include semtags --- scripts/xml2conll.py | 35 +++++++++++++++++++++ scripts/xml_add_stag.py | 67 +++++++++++++++++++++++++++++++++++++++++ 2 files changed, 102 insertions(+) create mode 100644 scripts/xml2conll.py create mode 100644 scripts/xml_add_stag.py diff --git a/scripts/xml2conll.py b/scripts/xml2conll.py new file mode 100644 index 00000000..6f668b1a --- /dev/null +++ b/scripts/xml2conll.py @@ -0,0 +1,35 @@ +# -*- coding: utf-8 -*- +# +# Copyright 2018 Joan Gines i Ametlle +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import sys +import string +from lxml import etree + +# source xml file to extract tokens from +ifile = sys.argv[1] + +# navigate the tags contained in the xml tree +tree = etree.parse(ifile) +root = tree.getroot() +num_sents = 0 + +for sent in root.iter('sentence'): + if num_sents > 0: + print('') + for token in sent[0].findall('token'): + print(token.get('surf')) + num_sents = num_sents + 1 + diff --git a/scripts/xml_add_stag.py b/scripts/xml_add_stag.py new file mode 100644 index 00000000..1c0d5c19 --- /dev/null +++ b/scripts/xml_add_stag.py @@ -0,0 +1,67 @@ +# -*- coding: utf-8 -*- +# +# Copyright 2018 Joan Gines i Ametlle +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import sys +import codecs +import string +from lxml import etree + +# source xml file to inject tokens to +ifile = sys.argv[1] + +# source file with tagged sentences +semfile = sys.argv[2] + +# output file +ofile = sys.argv[3] + +# extract semantic tags +stags = [[]] +sent_index = 0 + +for line in codecs.open(semfile, mode = 'r', errors = 'ignore', encoding = 'utf-8'): + line = line[:-1] + if line: + tag, _ = line.split('\t') + stags[sent_index].append(tag) + else: + stags.append([]) + sent_index = sent_index + 1 + +# navigate the tags contained in the xml tree +tree = etree.parse(ifile) +root = tree.getroot() +sent_index = -1 +word_index = -1 + +for sent in root.iter('sentence'): + sent_index = sent_index + 1 + + word_index = 0 + for token in sent[0].findall('token'): + token.set('stag', stags[sent_index][word_index]) + word_index = word_index + 1 + + word_index = 0 + for span in sent[1].findall('span'): + surf = span.get('surf') + if surf: + span.set('stag', stags[sent_index][word_index]) + word_index = word_index + 1 + +# write out result +tree.write(ofile) + From 5a6820fc82bb82d2ea93c76837ec39f07e26b511 Mon Sep 17 00:00:00 2001 From: ginesam Date: Thu, 26 Jul 2018 10:25:24 +0900 Subject: [PATCH 03/12] visualize sem-tags --- scripts/visualization_tools.py | 22 ++++++++++++++++++++-- 1 file changed, 20 insertions(+), 2 deletions(-) diff --git a/scripts/visualization_tools.py b/scripts/visualization_tools.py index f9ff1573..cc5a29c9 100644 --- a/scripts/visualization_tools.py +++ b/scripts/visualization_tools.py @@ -36,6 +36,7 @@ kLexicalColor = 'Black' kEntityColor = 'Green' kPosColor = 'Green' +kStagColor = 'Fuchsia' # The full list of colors is: # Black Green Silver Lime Gray Olive White Maroon Red Purple Fuchsia Yellow Navy # Blue Teal Aqua @@ -97,6 +98,13 @@ def get_pos_mathml(pos): + pos \ + "\n" +def get_stag_mathml(stag): + return "" \ + + stag \ + + "\n" + def get_semantics_mathml(semantics): return "" + pos3_mathml if pos == '.': - mathml_str = get_fraction_mathml(category_mathml, surf_mathml, '0') + if stag: + mathml_stag_str = get_fraction_mathml(category_mathml, stag_mathml, '0') + mathml_str = get_fraction_mathml(mathml_stag_str, surf_mathml, '0') + else: + mathml_str = get_fraction_mathml(category_mathml, surf_mathml, '0') else: mathml_pos_str = get_fraction_mathml(category_mathml, pos_mathml, '0') - mathml_str = get_fraction_mathml(mathml_pos_str, surf_mathml, '0') + if stag: + mathml_stag_str = get_fraction_mathml(mathml_pos_str, stag_mathml, '0') + mathml_str = get_fraction_mathml(mathml_stag_str, surf_mathml, '0') + else: + mathml_str = get_fraction_mathml(mathml_pos_str, surf_mathml, '0') elif len(ccg_node) == 1: mathml_str_child = convert_node_to_mathml(ccg_node[0], sem_tree, tokens) rule = ccg_node.get('rule') From 2a5e7e22a58c681bd6d89c342b9b11059da2c2f7 Mon Sep 17 00:00:00 2001 From: ginesam Date: Thu, 26 Jul 2018 10:30:03 +0900 Subject: [PATCH 04/12] stag visualization fix --- scripts/visualization_tools.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/scripts/visualization_tools.py b/scripts/visualization_tools.py index cc5a29c9..aa63c91a 100644 --- a/scripts/visualization_tools.py +++ b/scripts/visualization_tools.py @@ -124,7 +124,8 @@ def convert_node_to_mathml(ccg_node, sem_tree, tokens): pos = token.get('pos') pos_mathml = get_pos_mathml(pos) stag = token.get('stag') - stag_mathml = get_stag_mathml(stag) + if stag: + stag_mathml = get_stag_mathml(stag) entity = token.get('entity') if not entity == None: entity_mathml = get_entity_mathml(entity) From 4debbd3c3b4e2563dace6d9fb0b6787306dd3470 Mon Sep 17 00:00:00 2001 From: ginesam Date: Wed, 8 Aug 2018 14:12:43 +0200 Subject: [PATCH 05/12] semtagger script --- README.md | 11 +++++++---- en/install_semtagger.sh | 10 ++++++++++ 2 files changed, 17 insertions(+), 4 deletions(-) create mode 100755 en/install_semtagger.sh diff --git a/README.md b/README.md index 8ab65d6f..bf4ce09d 100644 --- a/README.md +++ b/README.md @@ -73,15 +73,18 @@ Simply do: The command above will download Jigg, its models, and create the file `ja/jigg_location.txt` where the path to Jigg is specified. That is all. -### Installing [semtagger](https://github.com/ginesam/semtagger) (optional) +### Installing [semtagger](https://github.com/ginesam/semtagger) (for English, optional) -The tagger can be obtained directly from its repository: +You can optionally download and install a semantic tagger by running the following +script from the ccg2lambda directory: ```bash -git clone https://github.com/ginesam/semtagger "/path/to/semtagger/" +./en/install_semtagger.sh ``` -Note that after downloading, you must follow the instructions given in order to train a tagging model. +This will generate a file `en/semtagger_location.txt` with the path to the semantic tagger. +Note that after downloading, you must follow the instructions given [here](https://github.com/ginesam/semtagger) in order to train a +tagging model. ## Using the Semantic Parser diff --git a/en/install_semtagger.sh b/en/install_semtagger.sh new file mode 100755 index 00000000..bb5d26af --- /dev/null +++ b/en/install_semtagger.sh @@ -0,0 +1,10 @@ +#!/bin/bash +# +# Download semtagger from https://github.com/ginesam/semtagger + +semtagger_url="https://github.com/ginesam/semtagger.git" +semtagger_dir=`pwd`"/"semtagger + +git clone https://github.com/ginesam/semtagger $semtagger_dir +echo $semtagger_dir > en/semtagger_location.txt + From 3350f8d42f822c8512578d64f652d86d4b8d1862 Mon Sep 17 00:00:00 2001 From: ginesam Date: Thu, 9 Aug 2018 14:06:29 +0200 Subject: [PATCH 06/12] fracas script --- en/emnlp2015exp.sh | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/en/emnlp2015exp.sh b/en/emnlp2015exp.sh index 6d37b568..868be9ad 100755 --- a/en/emnlp2015exp.sh +++ b/en/emnlp2015exp.sh @@ -65,6 +65,12 @@ parser_cmd="${parser_dir}/bin/candc \ --candc-printer xml \ --input" +# Set a variable with the location of the semtagger tool (if used) +semtagger_dir="" +if [ -f en/semtagger_location.txt ]; then + semtagger_dir=`cat en/semtagger_location.txt` +fi + # These variables contain the names of the directories where intermediate # results will be written. plain_dir=${dataset}"_plain" @@ -121,6 +127,25 @@ for f in ${plain_dir}/*.tok; do python en/candc2transccg.py ${parsed_dir}/${base_filename}.candc.xml \ > ${parsed_dir}/${base_filename/.tok/}.xml fi + # inject semantic tag information when using semtagger + if [ -n "$semtagger_dir" ]; then + if [ -f "$semtagger_dir"/run.sh ]; then + cp ${parsed_dir}/${base_filename/.tok/}.xml \ + ${parsed_dir}/${base_filename/.tok/}.xml.old + python scripts/xml2conll.py ${parsed_dir}/${base_filename/.tok/}.xml.old \ + > ${parsed_dir}/${base_filename/.tok/}.off + . ${semtagger_dir}/run.sh --predict \ + --input ${parsed_dir}/${base_filename/.tok/}.off \ + --output ${parsed_dir}/${base_filename/.tok/}.sem + python scripts/xml_add_stag.py \ + ${parsed_dir}/${base_filename/.tok/}.xml.old \ + ${parsed_dir}/${base_filename/.tok/}.sem \ + ${parsed_dir}/${base_filename/.tok/}.xml + rm -f ${parsed_dir}/${base_filename/.tok/}.xml.old + rm -f ${parsed_dir}/${base_filename/.tok/}.off + rm -f ${parsed_dir}/${base_filename/.tok/}.sem + fi + fi done echo From 4a560119956651ecd8a317e2d48365149a16741c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Joan=20Gin=C3=A9s=20i=20Ametll=C3=A9?= Date: Fri, 17 Aug 2018 03:53:44 +0200 Subject: [PATCH 07/12] Update fracas.md --- en/fracas.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/en/fracas.md b/en/fracas.md index b69d1230..2c62186f 100644 --- a/en/fracas.md +++ b/en/fracas.md @@ -1,6 +1,6 @@ # Running the RTE pipeline on FraCas. -First, ensure that you have downloaded C&C parser and wrote its location in the file `en/candc_location.txt`. +First, ensure that you have downloaded C&C parser and wrote its location in the file `en/candc_location.txt`. Also ensure that you have downloaded semtagger, wrote its location in the file `en/semtagger_location.txt` and trained a tagging model in case you are willing to use semantic templates with semantic tags. Second, you need to download the copy of [FraCaS provided by MacCartney and Manning (2007)](http://www-nlp.stanford.edu/~wcmac/downloads/fracas.xml): From 821fa48635d1941d3d04b7bbcda2bdb8e7d22999 Mon Sep 17 00:00:00 2001 From: ginesam Date: Fri, 17 Aug 2018 03:56:10 +0200 Subject: [PATCH 08/12] added final templates --- ...mantic_templates_en_semtags_emnlp2015.yaml | 570 ++++++++++++++++++ 1 file changed, 570 insertions(+) create mode 100644 en/semantic_templates_en_semtags_emnlp2015.yaml diff --git a/en/semantic_templates_en_semtags_emnlp2015.yaml b/en/semantic_templates_en_semtags_emnlp2015.yaml new file mode 100644 index 00000000..c38d8648 --- /dev/null +++ b/en/semantic_templates_en_semtags_emnlp2015.yaml @@ -0,0 +1,570 @@ +# +# Copyright 2015 Koji Mineshima +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +##### Semantic templates for English using semantic tags ##### + +## NOTE: In some cases we do still check the surface form of words due to the +## existence of certain specialized predicates that are hard to invoke using +## semantic tags in isolation. + + +##### Unary lexical rules ##### + +# existential type-raising from N to NP +- category: NP + rule: lex + semantics: \E F1 F2. exists x. (E(x) & F1(x) & F2(x)) + +# Unary rule for negative NPs +- category: NP + rule: lex + semantics: \C F1 F2. - exists x. (C(x) & F1(x) & F2(x)) + child_any_stag: NOT + +# Unary rule for proportional NPs +- category: NP + rule: lex + semantics: \C. C + child_any_base: most + +# existential type-raising for proper nouns from N to NP +- category: NP + rule: lex + semantics: \E F1 F2. exists x. ((x = E) & F1(E) & F2(E)) + child0_costag: NAM + +- category: NP + rule: lex + semantics: \E F1 F2. exists x. ((x = E) & F1(E) & F2(E)) + child_any_costag: NAM + child_any_stag: DIS + +- category: N\N + rule: lex + semantics: \V F x. (V(\G1 G2.G2(x)) & F(x)) + +- category: NP\NP + rule: lex + semantics: \V Q F1. Q(\x.(V(\F2 F3.F3(x)) & F1(x))) + +- category: S[X=true]/(S[X=true]\NP) + rule: tr + semantics: \Q V. V(Q) + + +##### Binary lexical rules ##### + +## universal interpretattion of plural definite descriptions +- category: NP + rule: fa + semantics: \L F1 F2 F3. forall x. (F1(x) -> (F2(x) -> F3(x))) + child0_stag: DEF + child_any_pos: NNPS + +## Conjunction +- category: S\S + rule: conj + semantics: \L S1 S2. (S1 & S2) + child0_stag: AND + +- category: NP\NP + rule: conj + semantics: \L Q1 Q2 F1 F2. (Q2(F1, F2) & Q1(F1, F2)) + child0_stag: GRP + +- category: NP\NP + rule: conj + semantics: \L Q1 Q2 F1 F2. (Q2(F1, F2) & Q1(F1, F2)) + child0_stag: AND + +- category: N\N + rule: conj + semantics: \L F1 F2 x. (F1(x) & F2(x)) + child0_stag: AND + +- category: (S\NP)\(S\NP) + rule: conj + semantics: \L V1 V2 Q. Q(\w.TrueP, \x.(V1(\F1 F2.F2(x)) & V2(\F1 F2.F2(x)))) + child0_stag: COO + +- category: (N/N)\(N/N) + rule: conj + semantics: \L M1 M2 F x. M1(M2(F),x) + child0_stag: AND + +- category: (N/N)/(N/N) + rule: conj + semantics: \L M1 M2 F x. M1(M2(F),x) + child0_stag: AND + +## Disjunction +- category: S\S + rule: conj + semantics: \L S1 S2. (S1 & S2) + child0_stag: DIS + +- category: NP\NP + rule: conj + semantics: \L Q1 Q2 F1 F2. (Q2(F1, F2) | Q1(F1, F2)) + child0_stag: DIS + +- category: N/N + rule: conj + semantics: \L F1 F2 x. (F1(x) | F2(x)) + child0_stag: DIS + +- category: N\N + rule: conj + semantics: \L F1 F2 x. (F1(x) | F2(x)) + child0_stag: DIS + +- category: (S\NP)\(S\NP) + rule: conj + semantics: \L V1 V2 Q. Q(\w.TrueP, \x.(V1(\F1 F2.F2(x)) | V2(\F1 F2.F2(x)))) + child0_stag: DIS + +- category: (N/N)\(N/N) + rule: conj + semantics: \L M1 M2 F x. (M1(F,x) | M2(F,x)) + child0_stag: DIS + +- category: (N/N)/(N/N) + rule: conj + semantics: \L M1 M2 F x. M1(M2(F),x) + child0_stag: DIS + +## sentence final particle +- category: S + rule: rp + semantics: \S D. S + +## Rules for commas +- category: S + rule: lp + semantics: \L S. S + child0_stag: NIL + +- category: NP + rule: rp + semantics: \L R. L + child0_stag: NIL + +- category: S\NP + rule: rp + semantics: \L R. L + child0_stag: NIL + +- category: NP\NP + rule: conj + semantics: \L Q1 Q2 F1 F2. (Q2(F1, F2) & Q1(F1, F2)) + child0_stag: EQU + +- category: NP\NP + rule: conj + semantics: \L Q1 Q2 F1 F2. (Q2(F1, F2) | Q1(F1, F2)) + child0_stag: NIL + child_any_stag: DIS + +- category: S\S + rule: conj + semantics: \L S. S + child0_stag: NIL + + +##### Noun Phrases and Pronouns ##### + +## Common nouns +- category: N + semantics: \E x. E(x) + coq_type: Entity -> Prop + +## Proper nouns +- category: N + semantics: \E. E + costag: NAM + +- category: N + semantics: \E. E + costag: UNE + pos: NNP + +# default existential interpretation +- category: NP + semantics: \E F1 F2. exists x. (F1(x) & F2(x)) + +- category: NP + semantics: \E F1 F2. forall x. (F1(x) -> F2(x)) + stag: AND + +- category: NP + semantics: \E F1 F2. forall x. (_people(x) -> (F1(x) -> F2(x))) + surf: everyone + +- category: NP + semantics: \E F1 F2. exists x. (two(x) & F1(x) & F2(x)) + surf: both + +- category: NP + semantics: \E F1 F2. (exists x. (F1(x) & two(x)) & forall x. (F1(x) -> - F2(x))) + surf: neither + + +##### Determiners ##### + +# default existential interpretation +- category: NP[nb=true]/N + semantics: \E F1 F2 F3. exists x. (F1(x) & F2(x) & F3(x)) + +- category: NP[nb=true]/N + semantics: \E F1 F2 F3. forall x. (F1(x) -> (F2(x) -> F3(x))) + stag: AND + +- category: NP[nb=true]/N + semantics: \E F1 F2 F3. exists x. (F1(x) & F2(x) & F3(x)) + stag: DIS + +- category: NP[nb=true]/N + semantics: \E F1 F2 F3. exists x. (F1(x) & F2(x) & F3(x)) + stag: DEF + +- category: NP[nb=true]/N + semantics: \E F1 F2 F3. forall x. (F1(x) -> (F2(x) -> - F3(x))) + stag: NOT + +- category: NP[nb=true]/N + semantics: \E F1 F2 F3. Most(\x(F1(x) & F2(x)), F3) + surf: most + +- category: NP[nb=true]/N + semantics: \E F1 F2 F3. exists x. (two(x) & F1(x) & F2(x) & F3(x)) + surf: both + +- category: NP[nb=true]/N + semantics: \E F1 F2 F3. (exists x. (F1(x) & two(x)) & forall x. (F1(x) -> (F2(x) -> - F3(x)))) + surf: neither + +- category: NP/NP + semantics: \E Q F1 F2. forall x. (Q(\w.TrueP, \y.((x = y) & F1(y))) -> F2(x)) + stag: AND + +- category: NP\NP + semantics: \E Q F1 F2. forall x. (Q(\w.TrueP, \y.((x = y) & F1(y))) -> F2(x)) + stag: AND + + +##### Adverbs ##### + +- category: (N/N)/(N/N) + semantics: \E A F x. (E(x) & A(F)(x)) + coq_type: Entity -> Prop + +- category: (S\NP)\(S\NP) + semantics: \E V Q. Q(\w.TrueP, \x.(E(x) & V(\F1 F2.F2(x)))) + coq_type: Entity -> Prop + +- category: (S\NP)/(S\NP) + semantics: \E V Q. Q(\w.TrueP, \x.(E(x) & V(\F1 F2.F2(x)))) + coq_type: Entity -> Prop + +- category: (S/S)/NP + semantics : \E Q S. (E(Q(\w.TrueP, \w.TrueP),S) & S) + coq_type: Prop -> Prop -> Prop + +- category: ((S\NP)\(S\NP))/S[dcl=true] + semantics: \E S V Q. Q(\w.TrueP, \x.(S & V(\F1 F2.F2(x)) & E(S, V(\F1 F2.F2(x))))) + coq_type: Entity -> Prop + +- category: ((S\NP)\(S\NP))/((S\NP)\(S\NP)) + semantics: \E A V Q. Q(\w.TrueP, \x.(A(V)(\F1 F2.(F2(x) & E(x))))) + coq_type: Entity -> Prop + + +##### Modifiers ##### +- category: N/N + semantics: \E F x. (E(x) & F(x)) + coq_type: Entity -> Prop + +- category: N/N + semantics: \E F1 F2 F3. Most(\x.(F1(x) & F2(x)), F3) + surf: most + +- category: N\N + semantics: \E F x. (E(x) & F(x)) + coq_type: Entity -> Prop + +- category: (N/N)\NP + semantics: \E Q F x. Q(\w.TrueP, \y.(E(x,y) & F(x))) + coq_type: Entity -> Entity -> Prop + + +##### Prepositions ##### + +- category: (NP\NP)/NP + semantics: \E Q1 Q2 F1. Q2(\x.(Q1(\w.TrueP, \y.E(x,y)) & F1(x))) + coq_type: Entity -> Entity -> Prop + +- category: PP/NP + semantics: \E Q x. Q(\w.TrueP, \y.E(x,y)) + coq_type: Entity -> Entity -> Prop + +- category: PP/(S[ng=true]\NP) + semantics: \E V x. V(\F1 F2.F2(x)) + coq_type: Entity -> Entity -> Prop + +- category: (NP\NP)/S + semantics: \E S Q F1 F2. (Q(F1,F2) & S) + +- category: ((S\NP)\(S\NP))/NP + semantics: \E Q1 V Q2. Q2(\w.TrueP, \x.Q1(\w.TrueP, \y.(E(x,y) & V(\F1 F2.F2(x))))) + coq_type: Entity -> Entity -> Prop + +- category: ((S\NP)\(S\NP))/(S[ng=true]\NP) + semantics: \E V1 V2 Q. Q(\w.TrueP, \x.V1(\F1 F2.(V2(\G1 G2.G2(x)) & F2(x)))) + +- category: ((S\NP)\(S\NP))/(S[ng=true]\NP) + semantics: \E V1 V2 Q. Q(\w.TrueP, \x.V1(\F1 F2.(V2(\G1 G2.G2(x)) & -F2(x)))) + stag: NOT + +- category: (NP\NP)/(S[ng=true]\NP) + semantics: \E V Q F. Q(\x.V(\G1 G2.(G2(x) & F(x)))) + + +##### Verb phrases ##### + +## intransitive verbs +- category: S\NP + semantics: \E Q. Q(\w.TrueP, \x.E(x)) + coq_type: Entity -> Prop + +- category: S\NP + semantics: \E Q. Q(\w.TrueP, \x.(Prog(E(x)))) + coq_type: Entity -> Prop + stag: EXG + +## transitive verbs +- category: (S\NP)/NP + semantics: \E Q1 Q2. Q2(\w.TrueP, \x.Q1(\w.TrueP, \y.E(x,y))) + coq_type: Entity -> Entity -> Prop + +- category: (S\NP)/NP + semantics: \E Q1 Q2. Q2(\w.TrueP, \x.Q1(\w.TrueP, \y.Prog(E(x,y)))) + coq_type: Entity -> Entity -> Prop + stag: EXG + +- category: ((S\NP)/PP)/NP + semantics: \E Q1 F Q2. Q2(\w.TrueP, \x.(Q1(\w.TrueP, \y.E(x,y)) & F(x))) + coq_type: Entity -> Entity -> Prop + +- category: (S[dcl=true]\NP)/(S[to=true]\NP) + semantics: \E V Q. Q(\w.TrueP, \x.V(\F1 F2.E(x,F2(x)))) + coq_type: Entity -> Prop -> Prop + +- category: ((S\NP)/(S[ng=true]\NP))/NP + semantics: \E Q1 V Q2. Q2(\w.TrueP, \x.E(x,Q1(\w.TrueP, \y.V(\F1 F2.F2(y))))) + coq_type: Entity -> Entity -> Entity -> Prop + +- category: ((S[dcl=true]\NP)/(S[to=true]\NP))/NP + semantics: \E Q1 V Q2. Q2(\w.TrueP, \x.E(x,Q1(\w.TrueP, \y.V(\F1 F2.F2(y))))) + coq_type: Entity -> Prop -> Prop + +- category: ((S\NP)/NP)/NP + semantics: \E Q1 Q2 Q3. Q3(\w.TrueP,\x1.Q2(\w.TrueP,\x2.Q1(\w.TrueP,\x3.E(x1,x2,x3)))) + coq_type: Entity -> Entity -> Entity -> Prop + +- category: (S\NP)/PP + semantics: \E F Q. Q(\w.TrueP, \x.(E(x) & F(x))) + coq_type: Entity -> Prop + +- category: (S\NP)/S + semantics: \E S Q. Q(\w.TrueP, \x.E(x,S)) + coq_type: Entity -> Prop -> Prop + +- category: (S\NP)/S[em=true] + semantics: \E S Q. Q(\w.TrueP, \x.E(x,S)) + costag: EVE + +- category: (S\NP)/S[qem=true] + semantics: \E S Q. Q(\w.TrueP, \x.E(x,S)) + costag: EVE + +- category: ((S[dcl=true]\NP)/(S[b=true]\NP))/NP + semantics: \E Q1 V Q2. Q2(\w.TrueP, \x.Q1(\w.TrueP, \y.(E(x,y) & V(\F1 F2.F2(y))))) + coq_type: Entity -> Entity -> Prop + + +##### Copula ##### + +- category: (S\NP)/NP + semantics: \E Q1 Q2. Q2(\w.TrueP, \x.Q1(\w.TrueP, \y.(x = y))) + base: be + +- category: (S[dcl=true]\NP)/PP + semantics: \E F Q. Q(\w.TrueP, F) + costag: TNS + +- category: (S\NP)/(S[pss=true]\NP) + semantics: \E V Q. Q(\w.TrueP, \x.V(\F1 F2.F2(x))) + +- category: (S\NP)/(S[adj=true]\NP) + semantics: \E X. X + costag: TNS + +- category: (S\NP)/(S[adj=true]\NP) + semantics: \E X. X + costag: TNS + +- category: ((S[dcl=true]\NP[expl=true])/S[em=true])/(S[adj=true]\NP) + semantics: \E V S Q. V(\F1 F2.F2(S)) + +- category: (S[dcl=true]\(S[adj=true]\NP))/NP + semantics: \E Q1 Q2. Q2(\w.TrueP, \x.Q1(\w.TrueP, \y.E(x,y))) + coq_type: Entity -> Entity -> Prop + + +##### Negation ##### + +- category: (S\NP)\(S\NP) + semantics: \E V Q. Q(\w.TrueP, \x.-V(\F1 F2.F2(x))) + stag: NOT + +- category: (S[adj=true]\NP)/(S[adj=true]\NP) + semantics: \E V Q. Q(\w.TrueP, \x.-V(\F1 F2.F2(x))) + stag: NOT + + +##### Adjectives ##### + +- category: S[adj=true]\NP + semantics: \E Q. Q(\w.TrueP, \x.E(x)) + coq_type: Entity -> Prop + +- category: S[adj=true]\NP + semantics: \E Q. Q(\w.TrueP, \x.E(x)) + stag: IST + +- category: S[adj=true]\NP + semantics: \E Q. Q(\w.TrueP, \x.E(x)) + stag: SST + +- category: N/N + semantics: \E F x. E(F(x)) + stag: PST + +- category: (S[adj=true]\NP)/S[em=true] + semantics: \E S Q. Q(\w.TrueP, \x.E(x,S)) + coq_type: Entity -> Prop -> Prop + + +##### Auxiliary verbs ##### + +- category: (S[dcl=true]\NP)/(S[b=true]\NP) + semantics: \E V Q. Q(\w.TrueP, \x.V(\F1 F2.E(F2(x)))) + coq_type: Prop -> Prop + +- category: (S[dcl=true]\NP)/(S[b=true]\NP) + semantics: \E V. V + costag: TNS + + +##### Floating quantifiers ##### + +- category: (S\NP)/(S\NP) + semantics: \E V Q. forall x. (Q(\w.TrueP, \y.(x = y)) -> V(\F1 F2.F2(x))) + stag: AND + +- category: (S\NP)\(S\NP) + semantics: \E V Q. forall x. (Q(\w.TrueP, \y.(x = y)) -> V(\F1 F2.F2(x))) + stag: AND + + +##### Relative clauses ##### + +- category: (NP\NP)/(S[dcl=true]\NP) + semantics: \E V Q F1. Q(\x. (V(\F2 F3. F3(x)) & F1(x))) + +- category: NP/(S[dcl=true]/NP) + semantics: \E V F1 F2. exists x. (V(\G1 G2.G2(x)) & F1(x) & F2(x)) + +- category: (NP\NP)/(S[dcl=true]/NP) + semantics: \E V Q F1. Q(\x. (V(\F2 F3. F3(x)) & F1(x))) + + +##### Complementizers ##### + +- category: S[em=true]/S[dcl=true] + semantics: \E X. X + + +##### Connectives ##### + +## default conjunctive interpretation +- category: (S/S)/S[dcl=true] + semantics: \E S1 S2. (S1 & S2 & E(S1, S2)) + coq_type: Prop -> Prop -> Prop + +- category: (S/S)/S[dcl=true] + semantics: \E S1 S2. (S1 -> S2) + stag: IMP + +- category: (S/S)/S[dcl=true] + semantics: \E S1 S2. (S1 & S2) + costag: DSC + +- category: ((S\NP)\(S\NP))/S[dcl=true] + semantics: \E S V Q. E(S, V(Q)) + stag: REL + +- category: (S\NP)/(S\NP) + semantics: \E V Q. V(Q) + stag: ALT + +- category : N/N + semantics: \E X.X + costag: LOG + + +##### Semantically empty expressions ##### + +- category: NP[thr=true] + semantics: \E F1 F2. exists x.F2(x) + +- category: (S[to=true]\NP)/(S[b=true]\NP) + semantics: \E X. X + +- category: S[asup=true]\NP + semantics: \E X. X + +- category: (S/S)/(S[asup=true]\NP) + semantics: \E X Y. Y + + +##### Possesive particle ##### + +- category: (NP[nb=true]/N)\NP + semantics: \E Q F1 F2 F3. exists x.((Q(\w.TrueP, \y.Rel(x, y)) & F1(x)) & F2(x) & F3(x)) + stag: HAS + + +##### Sentence final particle ##### + +- category: . + semantics: \S X. X + stag: NIL + +- category: =true, + semantics: \S X. X + stag: NIL + + From 01ac18aeed2c5e30d96ad028535105ef1f75c922 Mon Sep 17 00:00:00 2001 From: ginesam Date: Fri, 17 Aug 2018 04:03:46 +0200 Subject: [PATCH 09/12] adding script --- scripts/xml_add_stag.py | 110 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 110 insertions(+) diff --git a/scripts/xml_add_stag.py b/scripts/xml_add_stag.py index 1c0d5c19..b7dbdaf9 100644 --- a/scripts/xml_add_stag.py +++ b/scripts/xml_add_stag.py @@ -19,6 +19,108 @@ import string from lxml import etree +# define the mapping from fine to coarse sem-tags +fine2coarse = dict() + +# anaphoric +fine2coarse['PRO'] = 'ANA' +fine2coarse['DEF'] = 'ANA' +fine2coarse['HAS'] = 'ANA' +fine2coarse['REF'] = 'ANA' +fine2coarse['EMP'] = 'ANA' + +# speech act +fine2coarse['GRE'] = 'ACT' +fine2coarse['ITJ'] = 'ACT' +fine2coarse['HES'] = 'ACT' +fine2coarse['QUE'] = 'ACT' + +# attribute +fine2coarse['QUC'] = 'ATT' +fine2coarse['QUV'] = 'ATT' +fine2coarse['COL'] = 'ATT' +fine2coarse['IST'] = 'ATT' +fine2coarse['SST'] = 'ATT' +fine2coarse['PRI'] = 'ATT' +fine2coarse['DEG'] = 'ATT' +fine2coarse['INT'] = 'ATT' +fine2coarse['REL'] = 'ATT' +fine2coarse['SCO'] = 'ATT' + +# comparative +fine2coarse['EQU'] = 'COM' +fine2coarse['MOR'] = 'COM' +fine2coarse['LES'] = 'COM' +fine2coarse['TOP'] = 'COM' +fine2coarse['BOT'] = 'COM' +fine2coarse['ORD'] = 'COM' + +# unnamed entity +fine2coarse['CON'] = 'UNE' +fine2coarse['ROL'] = 'UNE' +fine2coarse['GRP'] = 'UNE' + +# deixis +fine2coarse['DXP'] = 'DXS' +fine2coarse['DXT'] = 'DXS' +fine2coarse['DXD'] = 'DXS' + +# logical +fine2coarse['ALT'] = 'LOG' +fine2coarse['XCL'] = 'LOG' +fine2coarse['NIL'] = 'LOG' +fine2coarse['DIS'] = 'LOG' +fine2coarse['IMP'] = 'LOG' +fine2coarse['AND'] = 'LOG' + +# modality +fine2coarse['NOT'] = 'MOD' +fine2coarse['NEC'] = 'MOD' +fine2coarse['POS'] = 'MOD' + +# discourse +fine2coarse['SUB'] = 'DSC' +fine2coarse['COO'] = 'DSC' +fine2coarse['APP'] = 'DSC' +fine2coarse['BUT'] = 'DSC' + +# named entity +fine2coarse['PER'] = 'NAM' +fine2coarse['GPE'] = 'NAM' +fine2coarse['GPO'] = 'NAM' +fine2coarse['GEO'] = 'NAM' +fine2coarse['ORG'] = 'NAM' +fine2coarse['ART'] = 'NAM' +fine2coarse['HAP'] = 'NAM' +fine2coarse['UOM'] = 'NAM' +fine2coarse['CTC'] = 'NAM' +fine2coarse['URL'] = 'NAM' +fine2coarse['LIT'] = 'NAM' +fine2coarse['NTH'] = 'NAM' + +# events +fine2coarse['EXS'] = 'EVE' +fine2coarse['ENS'] = 'EVE' +fine2coarse['EPS'] = 'EVE' +fine2coarse['EXG'] = 'EVE' +fine2coarse['EXT'] = 'EVE' + +# tense and aspect +fine2coarse['NOW'] = 'TNS' +fine2coarse['PST'] = 'TNS' +fine2coarse['FUT'] = 'TNS' +fine2coarse['PRG'] = 'TNS' +fine2coarse['PFT'] = 'TNS' + +# temporal entity +fine2coarse['DAT'] = 'TIM' +fine2coarse['DOM'] = 'TIM' +fine2coarse['YOC'] = 'TIM' +fine2coarse['DOW'] = 'TIM' +fine2coarse['MOY'] = 'TIM' +fine2coarse['DEC'] = 'TIM' +fine2coarse['CLO'] = 'TIM' + # source xml file to inject tokens to ifile = sys.argv[1] @@ -53,6 +155,10 @@ word_index = 0 for token in sent[0].findall('token'): token.set('stag', stags[sent_index][word_index]) + if stags[sent_index][word_index] in fine2coarse: + token.set('costag', fine2coarse[stags[sent_index][word_index]]) + else: + token.set('costag', 'UNK') word_index = word_index + 1 word_index = 0 @@ -60,6 +166,10 @@ surf = span.get('surf') if surf: span.set('stag', stags[sent_index][word_index]) + if stags[sent_index][word_index] in fine2coarse: + span.set('costag', fine2coarse[stags[sent_index][word_index]]) + else: + span.set('costag', 'UNK') word_index = word_index + 1 # write out result From 6ca9478ca7dd30f05d2a9b646891481c8ba3e871 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Joan=20Gin=C3=A9s=20i=20Ametll=C3=A9?= Date: Fri, 17 Aug 2018 04:06:15 +0200 Subject: [PATCH 10/12] Update fracas.md --- en/fracas.md | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/en/fracas.md b/en/fracas.md index 2c62186f..6aafc354 100644 --- a/en/fracas.md +++ b/en/fracas.md @@ -16,7 +16,14 @@ git checkout tags/fracas ./en/emnlp2015exp.sh en/semantic_templates_en_emnlp2015.yaml fracas.xml ``` -This script will: +If you are using semantic tags in your template, you can similarly do: + +```bash +git checkout semtag-fracas +./en/emnlp2015exp.sh en/semantic_templates_en_emnlp2015.yaml fracas.xml +``` + +The scripts will: 1. Extract the plain text corresponding to the hypotheses and conclusions of all fracas problems. These hypotheses and conclusions are stored in a different file for each fracas problem, under the directory `fracas.xml_plain`. The gold entailment judgment is stored in files `fracas.xml_plain/*.answer`. 2. Parse the hypotheses and conclusions using C&C parser, and save them under the directory `fracas.xml_parsed`. From 44e9f3e11252b6990b481f6b8442685f8b9f27d1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Joan=20Gin=C3=A9s=20i=20Ametll=C3=A9?= Date: Fri, 17 Aug 2018 04:27:37 +0200 Subject: [PATCH 11/12] Update fracas.md --- en/fracas.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/en/fracas.md b/en/fracas.md index 6aafc354..eb11f5ad 100644 --- a/en/fracas.md +++ b/en/fracas.md @@ -20,7 +20,7 @@ If you are using semantic tags in your template, you can similarly do: ```bash git checkout semtag-fracas -./en/emnlp2015exp.sh en/semantic_templates_en_emnlp2015.yaml fracas.xml +./en/emnlp2015exp.sh en/semantic_templates_en_semtags_emnlp2015.yaml fracas.xml ``` The scripts will: From aaa34344b5fe52f467e214ba746c9329cdcf33ae Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Joan=20Gin=C3=A9s=20i=20Ametll=C3=A9?= Date: Fri, 17 Aug 2018 04:27:58 +0200 Subject: [PATCH 12/12] Update fracas.md --- en/fracas.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/en/fracas.md b/en/fracas.md index eb11f5ad..951b56c4 100644 --- a/en/fracas.md +++ b/en/fracas.md @@ -16,7 +16,7 @@ git checkout tags/fracas ./en/emnlp2015exp.sh en/semantic_templates_en_emnlp2015.yaml fracas.xml ``` -If you are using semantic tags in your template, you can similarly do: +If you are using semantic tags in your templates, you can similarly do: ```bash git checkout semtag-fracas