Skip to content

Commit b45c552

Browse files
committed
Added embedding visitor to allow search by embeddings
1 parent 3c2ad6b commit b45c552

File tree

8 files changed

+343
-3
lines changed

8 files changed

+343
-3
lines changed

composer.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616
"php": "^7.4 || ^8.0",
1717
"ext-json": "*",
1818
"ext-xmlwriter": "*",
19-
"ibexa/core": "~4.6.0@dev",
19+
"ibexa/core": "dev-taxonomy-suggestions as 4.6.x-dev",
2020
"netgen/query-translator": "^1.0.2",
2121
"symfony/http-kernel": "^5.0",
2222
"symfony/dependency-injection": "^5.0",
Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
<?php
2+
3+
/**
4+
* @copyright Copyright (C) Ibexa AS. All rights reserved.
5+
* @license For full copyright and license information view LICENSE file distributed with this source code.
6+
*/
7+
namespace Ibexa\Contracts\Solr\Query;
8+
9+
use Ibexa\Contracts\Core\Repository\Values\Content\Query\Embedding;
10+
11+
abstract class EmbeddingVisitor
12+
{
13+
abstract public function canVisit(Embedding $embedding): bool;
14+
15+
abstract public function visit(Embedding $embedding, int $limit): string;
16+
}
Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,59 @@
1+
<?php
2+
3+
/**
4+
* @copyright Copyright (C) Ibexa AS. All rights reserved.
5+
* @license For full copyright and license information view LICENSE file distributed with this source code.
6+
*/
7+
namespace Ibexa\Solr\Query\Common\EmbeddingVisitor;
8+
9+
use Ibexa\Contracts\Core\Repository\Exceptions\NotImplementedException;
10+
use Ibexa\Contracts\Core\Repository\Values\Content\Query\Embedding;
11+
use Ibexa\Contracts\Solr\Query\EmbeddingVisitor;
12+
13+
class Aggregate extends EmbeddingVisitor
14+
{
15+
/**
16+
* @var iterable<\Ibexa\Contracts\Solr\Query\EmbeddingVisitor>
17+
*/
18+
protected iterable $visitors = [];
19+
20+
/**
21+
* @param \Ibexa\Contracts\Solr\Query\EmbeddingVisitor[] $visitors
22+
*/
23+
public function __construct(iterable $visitors = [])
24+
{
25+
$this->visitors = $visitors;
26+
}
27+
28+
public function canVisit(Embedding $embedding): bool
29+
{
30+
return $this->findVisitor($embedding) !== null;
31+
}
32+
33+
/**
34+
* Map field value to a proper Solr representation.
35+
*
36+
* @throws \Ibexa\Contracts\Core\Repository\Exceptions\NotImplementedException
37+
*/
38+
public function visit(Embedding $embedding, int $limit): string
39+
{
40+
foreach ($this->visitors as $visitor) {
41+
if ($visitor->canVisit($embedding)) {
42+
return $visitor->visit($embedding, $limit);
43+
}
44+
}
45+
46+
throw new NotImplementedException('No visitor available for: ' . \get_class($embedding));
47+
}
48+
49+
private function findVisitor(Embedding $embedding): ?EmbeddingVisitor
50+
{
51+
foreach ($this->visitors as $visitor) {
52+
if ($visitor->canVisit($embedding)) {
53+
return $visitor;
54+
}
55+
}
56+
57+
return null;
58+
}
59+
}

src/lib/Query/Common/QueryConverter/NativeQueryConverter.php

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,9 +6,11 @@
66
*/
77
namespace Ibexa\Solr\Query\Common\QueryConverter;
88

9+
use Ibexa\Contracts\Core\Repository\Values\Content\EmbeddingQuery;
910
use Ibexa\Contracts\Core\Repository\Values\Content\Query;
1011
use Ibexa\Contracts\Solr\Query\AggregationVisitor;
1112
use Ibexa\Contracts\Solr\Query\CriterionVisitor;
13+
use Ibexa\Contracts\Solr\Query\EmbeddingVisitor;
1214
use Ibexa\Contracts\Solr\Query\SortClauseVisitor;
1315
use Ibexa\Solr\Query\FacetFieldVisitor;
1416
use Ibexa\Solr\Query\QueryConverter;
@@ -44,6 +46,8 @@ class NativeQueryConverter extends QueryConverter
4446
*/
4547
private $aggregationVisitor;
4648

49+
private EmbeddingVisitor $embeddingVisitor;
50+
4751
/**
4852
* Construct from visitors.
4953
*
@@ -55,26 +59,32 @@ public function __construct(
5559
CriterionVisitor $criterionVisitor,
5660
SortClauseVisitor $sortClauseVisitor,
5761
FacetFieldVisitor $facetBuilderVisitor,
58-
AggregationVisitor $aggregationVisitor
62+
AggregationVisitor $aggregationVisitor,
63+
EmbeddingVisitor $embeddingVisitor
5964
) {
6065
$this->criterionVisitor = $criterionVisitor;
6166
$this->sortClauseVisitor = $sortClauseVisitor;
6267
$this->facetBuilderVisitor = $facetBuilderVisitor;
6368
$this->aggregationVisitor = $aggregationVisitor;
69+
$this->embeddingVisitor = $embeddingVisitor;
6470
}
6571

6672
public function convert(Query $query, array $languageSettings = [])
6773
{
6874
$params = [
6975
'q' => '{!lucene}' . $this->criterionVisitor->visit($query->query),
70-
'fq' => '{!lucene}' . $this->criterionVisitor->visit($query->filter),
76+
'fq' => ['{!lucene}' . $this->criterionVisitor->visit($query->filter)],
7177
'sort' => $this->getSortClauses($query->sortClauses),
7278
'start' => $query->offset,
7379
'rows' => $query->limit,
7480
'fl' => '*,score,[shard]',
7581
'wt' => 'json',
7682
];
7783

84+
if ($query instanceof EmbeddingQuery && $query->getEmbedding() !== null) {
85+
$params['fq'][] = $this->embeddingVisitor->visit($query->getEmbedding(), $query->limit);
86+
}
87+
7888
$facetParams = $this->getFacetParams($query->facetBuilders);
7989
if (!empty($facetParams)) {
8090
$params['facet'] = 'true';

src/lib/Resources/config/container/solr.yml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -100,6 +100,7 @@ services:
100100
- '@ibexa.solr.query.content.sort_clause_visitor.aggregate'
101101
- '@ibexa.solr.query.content.facet_builder_visitor.aggregate'
102102
- '@ibexa.solr.query.content.aggregation_visitor.dispatcher'
103+
- '@ibexa.solr.query.content.embedding_visitor.aggregate'
103104

104105
ibexa.solr.query_converter.location:
105106
class: Ibexa\Solr\Query\Common\QueryConverter\NativeQueryConverter
@@ -108,6 +109,7 @@ services:
108109
- '@ibexa.solr.query.location.sort_clause_visitor.aggregate'
109110
- '@ibexa.solr.query.location.facet_builder_visitor.aggregate'
110111
- '@ibexa.solr.query.location.aggregation_visitor.dispatcher'
112+
- '@ibexa.solr.query.content.embedding_visitor.aggregate'
111113

112114
Ibexa\Solr\Gateway\UpdateSerializer:
113115
arguments:

src/lib/Resources/config/container/solr/services.yml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,11 @@ services:
2828
arguments:
2929
$client: '@ibexa.solr.http_client'
3030

31+
ibexa.solr.query.content.embedding_visitor.aggregate:
32+
class: Ibexa\Solr\Query\Common\EmbeddingVisitor\Aggregate
33+
arguments:
34+
$visitors: !tagged ibexa.search.solr.query.content.embedding.visitor
35+
3136
# Note: services tagged with 'ibexa.search.solr.query.content.criterion.visitor'
3237
# are registered to this one using compilation pass
3338
ibexa.solr.query.content.criterion_visitor.aggregate:
Lines changed: 169 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,169 @@
1+
<?xml version="1.0" encoding="UTF-8" ?>
2+
<!DOCTYPE schema [
3+
<!ENTITY langfields SYSTEM "language-fieldtypes.xml">
4+
<!ENTITY customfields SYSTEM "custom-fields-types.xml">
5+
]>
6+
<!--
7+
This is the Solr schema file. This file should be named "schema.xml" and should
8+
be in the conf directory under the solr home (i.e. ./solr/conf/schema.xml by
9+
default) or located where the classloader for the Solr webapp can find it.
10+
11+
It provides the default types and definitions for a functional Solr based
12+
search in eZ Publish 5. You may extend it with your own definitions, but you
13+
should not remove or drastically change the existing definitions.
14+
-->
15+
16+
<schema name="eZ Publish 5 base schema" version="1.5">
17+
<!--
18+
language specific field types are included here, there should be at least
19+
a field type with the name "text" be defined"
20+
Included in the eZ platform distribution are configurations for various
21+
languages, including additional files like stopwords or other features
22+
under the directory "solr.languages"
23+
-->
24+
&langfields;
25+
26+
<!--
27+
custom field types and fields are included from a separate file to ease upgrades
28+
-->
29+
&customfields;
30+
31+
<!--
32+
Default types by Solr. Will be reused for dynamic fields.
33+
-->
34+
<fieldType name="string" class="solr.TextField" sortMissingLast="true">
35+
<analyzer type="index">
36+
<tokenizer class="solr.KeywordTokenizerFactory"/>
37+
<filter class="solr.LowerCaseFilterFactory"/>
38+
</analyzer>
39+
<analyzer type="query">
40+
<tokenizer class="solr.KeywordTokenizerFactory"/>
41+
<filter class="solr.LowerCaseFilterFactory"/>
42+
</analyzer>
43+
</fieldType>
44+
45+
<fieldType name="text_general" class="solr.TextField" positionIncrementGap="100" multiValued="true" sortMissingLast="true">
46+
<analyzer type="index">
47+
<tokenizer class="solr.KeywordTokenizerFactory"/>
48+
<filter class="solr.LowerCaseFilterFactory"/>
49+
</analyzer>
50+
<analyzer type="query">
51+
<tokenizer class="solr.KeywordTokenizerFactory"/>
52+
<filter class="solr.LowerCaseFilterFactory"/>
53+
</analyzer>
54+
</fieldType>
55+
56+
<fieldType name="pdate" class="solr.DatePointField" docValues="true"/>
57+
<fieldType name="pdates" class="solr.DatePointField" docValues="true" multiValued="true"/>
58+
<!--
59+
Numeric field types that index values using KD-trees.
60+
Point fields don't support FieldCache, so they must have docValues="true" if needed for sorting, faceting, functions, etc.
61+
-->
62+
<fieldType name="pint" class="solr.IntPointField" docValues="true"/>
63+
<fieldType name="pfloat" class="solr.FloatPointField" docValues="true"/>
64+
<fieldType name="plong" class="solr.LongPointField" docValues="true"/>
65+
<fieldType name="pdouble" class="solr.DoublePointField" docValues="true"/>
66+
67+
<fieldType name="pints" class="solr.IntPointField" docValues="true" multiValued="true"/>
68+
<fieldType name="pfloats" class="solr.FloatPointField" docValues="true" multiValued="true"/>
69+
<fieldType name="plongs" class="solr.LongPointField" docValues="true" multiValued="true"/>
70+
<fieldType name="pdoubles" class="solr.DoublePointField" docValues="true" multiValued="true"/>
71+
<fieldType name="random" class="solr.RandomSortField" indexed="true"/>
72+
73+
<fieldType name="identifier" class="solr.StrField" sortMissingLast="true" />
74+
<fieldType name="boolean" class="solr.BoolField" sortMissingLast="true" multiValued="false"/>
75+
<fieldType name="booleans" class="solr.BoolField" sortMissingLast="true" multiValued="true"/>
76+
<fieldtype name="binary" class="solr.BinaryField"/>
77+
<fieldType name="int" class="solr.IntPointField" docValues="true"/>
78+
<fieldType name="float" class="solr.FloatPointField" docValues="true"/>
79+
<fieldType name="long" class="solr.LongPointField" docValues="true"/>
80+
<fieldType name="double" class="solr.DoublePointField" docValues="true"/>
81+
<fieldType name="date" class="solr.DatePointField" docValues="true"/>
82+
83+
<fieldtype name="ignored" stored="false" indexed="false" multiValued="true" class="solr.StrField" />
84+
<fieldType name="point" class="solr.PointType" dimension="2" subFieldSuffix="_d"/>
85+
<fieldType name="location" class="solr.LatLonPointSpatialField" sortMissingLast="true"/>
86+
87+
<!-- for 1536-dim models (ada-002 & 3-small) -->
88+
<fieldType name="vector_1536"
89+
class="solr.DenseVectorField"
90+
vectorDimension="1536"
91+
similarityFunction="cosine"
92+
indexed="true"
93+
stored="true" />
94+
95+
<!-- for the 3072-dim model (3-large) -->
96+
<fieldType name="vector_3072"
97+
class="solr.DenseVectorField"
98+
vectorDimension="3072"
99+
similarityFunction="cosine"
100+
indexed="true"
101+
stored="true"/>
102+
103+
<!--
104+
Required ID field.
105+
-->
106+
<field name="id" type="string" indexed="true" stored="true" required="true"/>
107+
108+
<!--
109+
Always contains the date a document was added to the index. Might be
110+
useful.
111+
-->
112+
<field name="timestamp" type="date" indexed="true" stored="true" default="NOW" multiValued="false"/>
113+
114+
<!--
115+
Points to the root document of a block of nested documents. Required for nested document support.
116+
-->
117+
<field name="_root_" type="string" indexed="true" stored="true" required="false"/>
118+
119+
<field name="document_type_id" type="string" indexed="true" stored="true" required="true"/>
120+
121+
<!--
122+
Dynamic field definitions. If a field name is not found, dynamicFields
123+
will be used if the name matches any of the patterns. RESTRICTION: the
124+
glob-like pattern in the name attribute must have a "*" only at the start
125+
or the end. EXAMPLE: name="*_i" will match any field ending in _i (like
126+
myid_i, z_i) Longer patterns will be matched first. if equal size
127+
patterns both match, the first appearing in the schema will be used.
128+
-->
129+
<dynamicField name="*_i" type="int" indexed="true" stored="true"/>
130+
<dynamicField name="*_mi" type="int" indexed="true" stored="true" multiValued="true"/>
131+
<dynamicField name="*_id" type="identifier" indexed="true" stored="true"/>
132+
<dynamicField name="*_mid" type="identifier" indexed="true" stored="true" multiValued="true"/>
133+
<dynamicField name="*_s" type="string" indexed="true" stored="true"/>
134+
<dynamicField name="*_ms" type="string" indexed="true" stored="true" multiValued="true"/>
135+
<dynamicField name="*_l" type="long" indexed="true" stored="true"/>
136+
<dynamicField name="*_t" type="text" indexed="true" stored="true" multiValued="true" omitNorms="false"/>
137+
<dynamicField name="*_b" type="boolean" indexed="true" stored="true"/>
138+
<dynamicField name="*_mb" type="boolean" indexed="true" stored="true" multiValued="true"/>
139+
<dynamicField name="*_f" type="float" indexed="true" stored="true"/>
140+
<dynamicField name="*_d" type="double" indexed="true" stored="true"/>
141+
<dynamicField name="*_dt" type="date" indexed="true" stored="true"/>
142+
<dynamicField name="*_gl" type="location" indexed="true" stored="true"/>
143+
<dynamicField name="*_gl_0_coordinate" type="double" indexed="true" stored="true"/>
144+
<dynamicField name="*_gl_1_coordinate" type="double" indexed="true" stored="true"/>
145+
146+
<!--
147+
This field is required to allow random sorting
148+
-->
149+
<dynamicField name="random*" type="random" indexed="true" stored="false"/>
150+
151+
<!--
152+
This field is required for Embeddings
153+
-->
154+
<!-- 1536-dim suffix for ada-002 -->
155+
<dynamicField name="*_ada002_dv" type="vector_1536"/>
156+
157+
<!-- 1536-dim suffix for 3-small -->
158+
<dynamicField name="*_3small_dv" type="vector_1536"/>
159+
160+
<!-- 3072-dim suffix for 3-large -->
161+
<dynamicField name="*_3large_dv" type="vector_3072"/>
162+
163+
<!--
164+
This field is required since Solr 4
165+
-->
166+
<field name="_version_" type="long" indexed="true" stored="true" multiValued="false" />
167+
168+
<uniqueKey>id</uniqueKey>
169+
</schema>

0 commit comments

Comments
 (0)