Skip to content

Commit 24b09ef

Browse files
authored
test: multivec py (#31)
1 parent 9484a64 commit 24b09ef

File tree

6 files changed

+34
-4
lines changed

6 files changed

+34
-4
lines changed

.gitignore

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,4 +22,5 @@ poetry.lock
2222
.pytest_cache/
2323
*_pycache__
2424

25-
senv
25+
senv
26+
.venv

pom.xml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
<modelVersion>4.0.0</modelVersion>
77
<groupId>io.qdrant</groupId>
88
<artifactId>spark</artifactId>
9-
<version>2.3.1</version>
9+
<version>2.3.2</version>
1010
<name>qdrant-spark</name>
1111
<url>https://github.com/qdrant/qdrant-spark</url>
1212
<description>An Apache Spark connector for the Qdrant vector database</description>
@@ -18,7 +18,7 @@
1818
</licenses>
1919
<developers>
2020
<developer>
21-
<name>Anush Shetty</name>
21+
<name>Anush008</name>
2222
<email>anush.shetty@qdrant.com</email>
2323
<organization>Qdrant</organization>
2424
<organizationUrl>http://qdrant.tech</organizationUrl>

src/test/python/conftest.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -89,6 +89,11 @@ def qdrant():
8989
size=QDRANT_EMBEDDING_DIM,
9090
distance=QDRANT_DISTANCE,
9191
),
92+
"multi": models.VectorParams(
93+
size=QDRANT_EMBEDDING_DIM,
94+
distance=QDRANT_DISTANCE,
95+
multivector_config=models.MultiVectorConfig(comparator=models.MultiVectorComparator.MAX_SIM)
96+
)
9297
},
9398
sparse_vectors_config={
9499
"sparse": models.SparseVectorParams(),

src/test/python/requirements.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
11
pyspark==3.5.1
22
pytest==8.2.0
3-
qdrant-client==1.9.0
3+
qdrant-client==1.10.1
44
testcontainers==4.4.0

src/test/python/schema.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -102,5 +102,6 @@
102102
StructField("dense_vector", ArrayType(FloatType()), nullable=False),
103103
StructField("sparse_indices", ArrayType(IntegerType()), nullable=False),
104104
StructField("sparse_values", ArrayType(FloatType()), nullable=False),
105+
StructField("multi", ArrayType(ArrayType(FloatType())), nullable=False),
105106
]
106107
)

src/test/python/test_qdrant_ingest.py

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -197,6 +197,29 @@ def test_upsert_multiple_sparse_dense_vectors(
197197
qdrant.client.count(qdrant.collection_name).count == df.count()
198198
), "Uploaded points count is not equal to the dataframe count"
199199

200+
def test_upsert_multi_vector(
201+
qdrant: Qdrant, spark_session: SparkSession
202+
):
203+
df = (
204+
spark_session.read.schema(schema)
205+
.option("multiline", "true")
206+
.json(str(input_file_path))
207+
)
208+
opts = {
209+
"qdrant_url": qdrant.url,
210+
"collection_name": qdrant.collection_name,
211+
"multi_vector_fields": "multi",
212+
"multi_vector_names": "multi",
213+
"schema": df.schema.json(),
214+
"api_key": qdrant.api_key,
215+
}
216+
217+
df.write.format("io.qdrant.spark.Qdrant").options(**opts).mode("append").save()
218+
219+
assert (
220+
qdrant.client.count(qdrant.collection_name).count == df.count()
221+
), "Uploaded points count is not equal to the dataframe count"
222+
200223

201224
# Test an upsert without vectors. All the dataframe fields will be treated as payload
202225
def test_upsert_without_vectors(qdrant: Qdrant, spark_session: SparkSession):

0 commit comments

Comments
 (0)