|
| 1 | +""" |
| 2 | +Block ud.FixCompoundName finds compound relations between PROPN nodes and converts |
| 3 | +them to flat:name. This is not necessarily correct in all situations. The difference |
| 4 | +between compound and flat is that compound allows to distinguish head and modifier. |
| 5 | +Multiword person names (given name and surname, or various other patterns) typically |
| 6 | +should be analyzed as flat but there are treebanks that incorrectly use compound |
| 7 | +for person names. This block can be used to fix them. |
| 8 | +""" |
| 9 | +from udapi.core.block import Block |
| 10 | +import logging |
| 11 | + |
| 12 | + |
| 13 | +class FixCompoundName(Block): |
| 14 | + """ |
| 15 | + Converts a compound relation between two PROPN nodes into a flat relation. |
| 16 | + Compounds of a PROPN and a non-PROPN will be left alone, although they are |
| 17 | + suspicious, too. |
| 18 | + """ |
| 19 | + |
| 20 | + def process_node(self, node): |
| 21 | + if node.upos == 'PROPN' and node.udeprel == 'compound' and node.parent.upos == 'PROPN': |
| 22 | + # See if there are other PROPN compound siblings. |
| 23 | + namewords = [x for x in node.siblings if x.upos == 'PROPN' and x.udeprel == 'compound'] |
| 24 | + namewords.append(node.parent) |
| 25 | + namewords = sorted(namewords, key=lambda x: x.ord) |
| 26 | + ###!!! We currently cannot transform enhanced dependencies. |
| 27 | + ###!!! If we proceed, the basic tree would diverge from the enhanced dependencies. |
| 28 | + if len(node.deps) > 0: |
| 29 | + logging.fatal('There are enhanced dependencies but ud.FixCompoundName has been implemented only for basic dependencies.') |
| 30 | + # The first name word will be the technical head. If it is the current parent, fine. |
| 31 | + if namewords[0] != node.parent: |
| 32 | + namewords[0].parent = node.parent.parent |
| 33 | + namewords[0].deprel = node.parent.deprel |
| 34 | + for i in range(len(namewords)-1): |
| 35 | + namewords[i+1].parent = namewords[0] |
| 36 | + namewords[i+1].deprel = 'flat:name' |
0 commit comments