From 304aa99eea5a0b0574d2bad6e5c3184016684a22 Mon Sep 17 00:00:00 2001 From: shaohuasong-fang Date: Fri, 1 May 2026 14:24:56 +0800 Subject: [PATCH] Fix Skip File Validation When Offline with Existing Datasets When internet access is unavailable and a dataset has already been downloaded locally, the log output display it's still attempts file validation against remote servers, causing unnecessary failures. --- vectordb_bench/backend/data_source.py | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/vectordb_bench/backend/data_source.py b/vectordb_bench/backend/data_source.py index 139d2e308..200736c98 100644 --- a/vectordb_bench/backend/data_source.py +++ b/vectordb_bench/backend/data_source.py @@ -86,9 +86,13 @@ def read(self, dataset: str, files: list[str], local_ds_root: pathlib.Path): remote_file = pathlib.PurePosixPath("benchmark", dataset, file) local_file = local_ds_root.joinpath(file) - if (not local_file.exists()) or (not self.validate_file(remote_file, local_file)): - log.info(f"local file: {local_file} not match with remote: {remote_file}; add to downloading list") + # if (not local_file.exists()) or (not self.validate_file(remote_file, local_file)): + # log.info(f"local file: {local_file} not match with remote: {remote_file}; add to downloading list") + if not local_file.exists(): + log.info(f"local file: {local_file} not exist; add to downloading list") downloads.append((remote_file, local_file)) + else: + log.info(f"local file: {local_file} already exists, skip download") if len(downloads) == 0: return @@ -130,9 +134,13 @@ def read(self, dataset: str, files: list[str], local_ds_root: pathlib.Path): remote_file = pathlib.PurePosixPath(self.remote_root, dataset, file) local_file = local_ds_root.joinpath(file) - if (not local_file.exists()) or (not self.validate_file(remote_file, local_file)): - log.info(f"local file: {local_file} not match with remote: {remote_file}; add to downloading list") + # if (not local_file.exists()) or (not self.validate_file(remote_file, local_file)): + # log.info(f"local file: {local_file} not match with remote: {remote_file}; add to downloading list") + if not local_file.exists(): + log.info(f"local file: {local_file} not exist; add to downloading list") downloads.append(remote_file) + else: + log.info(f"local file: {local_file} already exists, skip download") if len(downloads) == 0: return