Skip to content

Commit d18ef43

Browse files
authored
tf-idf embeddings for search (#89)
* Removes PCA for tf-idf embeddings * PR comments * submodule
1 parent 5678072 commit d18ef43

File tree

2 files changed

+3
-2
lines changed

2 files changed

+3
-2
lines changed

util/embedders.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@ def get_embedder(
3737
elif model == "bag-of-words":
3838
embedder = BagOfWordsSentenceEmbedder(batch_size=batch_size)
3939
elif model == "tf-idf":
40-
embedder = TfidfSentenceEmbedder(batch_size=batch_size)
40+
return TfidfSentenceEmbedder(batch_size=batch_size, min_df=0)
4141
else:
4242
raise Exception(f"Unknown model {model}")
4343
elif (
@@ -66,6 +66,7 @@ def get_embedder(
6666
raise Exception(f"Unknown platform {platform}")
6767

6868
if record.count(project_id) < n_components:
69+
# no PCA for tf-idf
6970
return embedder
7071
else:
7172
return PCASentenceReducer(embedder, n_components=n_components)

0 commit comments

Comments
 (0)