From 56154b3ac5a20527655c5a49f4997266ec38e18b Mon Sep 17 00:00:00 2001 From: "suzuki.shuto" Date: Fri, 8 Apr 2022 15:02:41 +0900 Subject: [PATCH] Fix `KeyedVectors.add_vectors()` error when use most_similar --- gensim/models/keyedvectors.py | 4 +++- gensim/test/test_keyedvectors.py | 5 +++++ 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/gensim/models/keyedvectors.py b/gensim/models/keyedvectors.py index 54fa631778..54b558ff28 100644 --- a/gensim/models/keyedvectors.py +++ b/gensim/models/keyedvectors.py @@ -614,6 +614,8 @@ def add_vectors(self, keys, weights, extras=None, replace=False): for attr, extra in extras: self.expandos[attr][in_vocab_idxs] = extra[in_vocab_mask] + self.fill_norms() + def __setitem__(self, keys, weights): """Add keys and theirs vectors in a manual way. If some key is already in the vocabulary, old vector is replaced with the new one. @@ -705,7 +707,7 @@ def fill_norms(self, force=False): either recalculated or 'None', to trigger a full recalculation later on-request. """ - if self.norms is None or force: + if self.norms is None or len(self) != len(self.norms) or force: self.norms = np.linalg.norm(self.vectors, axis=1) @property diff --git a/gensim/test/test_keyedvectors.py b/gensim/test/test_keyedvectors.py index cc70577842..ea4e02b251 100644 --- a/gensim/test/test_keyedvectors.py +++ b/gensim/test/test_keyedvectors.py @@ -265,6 +265,11 @@ def test_add_multiple(self): for ent, vector in zip(entities, vectors): self.assertTrue(np.allclose(kv[ent], vector)) + # assert `len(kv)` == `len(kv.norms)` after `fill_norms()` + kv.fill_norms() + kv.add_vectors(["___not_present_in_keyed_vectors___"], [np.random.randn(self.vectors.vector_size)], replace=False) + self.assertEqual(len(kv), len(kv.norms)) + def test_add_type(self): kv = KeyedVectors(2) assert kv.vectors.dtype == REAL