7
7
for person names. This block can be used to fix them.
8
8
"""
9
9
from udapi .core .block import Block
10
+ import regex as re
10
11
import logging
11
12
12
13
@@ -19,18 +20,26 @@ class FixCompoundName(Block):
19
20
20
21
def process_node (self , node ):
21
22
if node .upos == 'PROPN' and node .udeprel == 'compound' and node .parent .upos == 'PROPN' :
23
+ origparent = node .parent
24
+ grandparent = origparent .parent
25
+ outdeprel = origparent .deprel
22
26
# See if there are other PROPN compound siblings.
23
- namewords = [x for x in node .siblings if x .upos == 'PROPN' and x .udeprel == 'compound' ]
24
- namewords .append (node .parent )
25
- namewords = sorted (namewords , key = lambda x : x .ord )
26
- ###!!! We currently cannot transform enhanced dependencies.
27
- ###!!! If we proceed, the basic tree would diverge from the enhanced dependencies.
28
- if len (node .deps ) > 0 :
29
- logging .fatal ('There are enhanced dependencies but ud.FixCompoundName has been implemented only for basic dependencies.' )
30
- # The first name word will be the technical head. If it is the current parent, fine.
31
- if namewords [0 ] != node .parent :
32
- namewords [0 ].parent = node .parent .parent
33
- namewords [0 ].deprel = node .parent .deprel
34
- for i in range (len (namewords )- 1 ):
35
- namewords [i + 1 ].parent = namewords [0 ]
36
- namewords [i + 1 ].deprel = 'flat:name'
27
+ namewords = sorted ([x for x in origparent .children (add_self = True ) if x .upos == 'PROPN' and (x .udeprel == 'compound' or x == origparent )], key = lambda y : y .ord )
28
+ # The Hindi treebank tags dates (['30', 'navaṁbara'], ['disaṁbara', '1993']) as PROPN compounds.
29
+ # This is wrong but it is also different from personal names we are targeting here.
30
+ # Hence, we will skip "names" that contain numbers.
31
+ if len ([x for x in namewords if re .search (r"\d" , x .form )]) == 0 :
32
+ #logging.info(str([x.misc['Translit'] for x in namewords]))
33
+ ###!!! We currently cannot transform enhanced dependencies.
34
+ ###!!! If we proceed, the basic tree would diverge from the enhanced dependencies.
35
+ if len (node .deps ) > 0 :
36
+ logging .fatal ('There are enhanced dependencies but ud.FixCompoundName has been implemented only for basic dependencies.' )
37
+ # The first name word will be the technical head. If it is the current parent, fine.
38
+ head = namewords [0 ]
39
+ rest = namewords [1 :]
40
+ if head != origparent :
41
+ head .parent = grandparent
42
+ head .deprel = outdeprel
43
+ for n in rest :
44
+ n .parent = head
45
+ n .deprel = 'flat:name'
0 commit comments