diff --git a/k8s-deploy/README.md b/k8s-deploy/README.md index cc1f272034..bf69f099c6 100644 --- a/k8s-deploy/README.md +++ b/k8s-deploy/README.md @@ -189,3 +189,46 @@ env: - Lightweight deployment is suitable for testing and small-scale usage, but data persistence and performance may be limited - Production deployment (PostgreSQL + Neo4J) is recommended for production environments and large-scale usage - For more customized configurations, please refer to the official LightRAG documentation + +## Preview / Experimental: DocumentDB Backend + +> ⚠️ **Not for production use.** The +> [DocumentDB Kubernetes operator](https://github.com/documentdb/documentdb-kubernetes-operator) +> is in active development and not yet GA. The integration below is provided as +> a playground so you can try a single MongoDB-compatible backend in place of +> running PostgreSQL **and** Neo4j separately. + +Edit [databases/00-config.sh](databases/00-config.sh): + +```bash +ENABLE_POSTGRESQL=false +ENABLE_NEO4J=false +ENABLE_DOCUMENTDB=true +``` + +Optionally edit the placeholder password in +[databases/documentdb/values.yaml](databases/documentdb/values.yaml), then run +the same install flow as the production path (`01-prepare.sh` → +`02-install-database.sh` → `install_lightrag.sh`). The scripts will: + +1. Install cert-manager (if missing) and the DocumentDB operator into the + `documentdb-operator` namespace via Helm — DocumentDB ships its own + operator and is not a KubeBlocks addon. +2. Apply a single-node `DocumentDB` CR + credentials Secret into `rag`. +3. Read `status.connectionString` from the resulting DocumentDB resource and + wire LightRAG with `LIGHTRAG_KV_STORAGE=MongoKVStorage`, + `LIGHTRAG_GRAPH_STORAGE=MongoGraphStorage`, + `LIGHTRAG_DOC_STATUS_STORAGE=MongoDocStatusStorage`, and + `LIGHTRAG_VECTOR_STORAGE=NanoVectorDBStorage`. + +**Caveats:** + +- **Vectors are not stored in DocumentDB.** DocumentDB does not implement the + MongoDB Atlas `$vectorSearch` operator that `MongoVectorDBStorage` requires, + so embeddings stay on the LightRAG PVC (`/app/data/rag_storage`). KV / graph + / doc-status data **do** live in DocumentDB collections. +- **Kubernetes 1.35+ required** — the DocumentDB operator uses the + `ImageVolume` feature. +- Two non-fatal startup warnings are expected and safe to ignore: + `createIndex.collation is not implemented yet` and + `Pipeline stage name not recognized: $listSearchIndexes`. diff --git a/k8s-deploy/databases/00-config.sh b/k8s-deploy/databases/00-config.sh index 9431a3d308..a292495852 100755 --- a/k8s-deploy/databases/00-config.sh +++ b/k8s-deploy/databases/00-config.sh @@ -19,3 +19,7 @@ ENABLE_QDRANT=false ENABLE_NEO4J=true ENABLE_ELASTICSEARCH=false ENABLE_MONGODB=false +# DocumentDB is deployed via its own operator (not a KubeBlocks addon). +# Install the operator separately first: +# https://github.com/documentdb/documentdb-kubernetes-operator +ENABLE_DOCUMENTDB=false diff --git a/k8s-deploy/databases/01-prepare.sh b/k8s-deploy/databases/01-prepare.sh index f43257a4ba..b705cc3262 100755 --- a/k8s-deploy/databases/01-prepare.sh +++ b/k8s-deploy/databases/01-prepare.sh @@ -29,5 +29,28 @@ helm repo update [ "$ENABLE_MONGODB" = true ] && print "Installing MongoDB addon..." && helm upgrade --install kb-addon-mongodb kubeblocks/mongodb --namespace kb-system --version $ADDON_CLUSTER_CHART_VERSION [ "$ENABLE_NEO4J" = true ] && print "Installing Neo4j addon..." && helm upgrade --install kb-addon-neo4j kubeblocks/neo4j --namespace kb-system --version $ADDON_CLUSTER_CHART_VERSION +# DocumentDB ships its own Kubernetes operator (not a KubeBlocks addon). +# Install cert-manager (a hard dependency of the DocumentDB operator) and the +# DocumentDB operator itself. cert-manager is installed only if not already +# present so we don't clobber an existing installation. +if [ "$ENABLE_DOCUMENTDB" = true ]; then + if ! helm status cert-manager -n cert-manager &> /dev/null && ! kubectl get deployment cert-manager -n cert-manager &> /dev/null; then + print "Installing cert-manager (DocumentDB operator dependency)..." + helm repo add jetstack https://charts.jetstack.io 2>/dev/null || true + helm repo update jetstack + helm upgrade --install cert-manager jetstack/cert-manager \ + --namespace cert-manager --create-namespace \ + --set installCRDs=true --wait + else + print "cert-manager already present, skipping." + fi + + print "Installing DocumentDB operator..." + helm repo add documentdb https://documentdb.github.io/documentdb-kubernetes-operator 2>/dev/null || true + helm repo update documentdb + helm upgrade --install documentdb-operator documentdb/documentdb-operator \ + --namespace documentdb-operator --create-namespace --wait +fi + print_success "KubeBlocks database addons installation completed!" print "Now you can run 02-install-database.sh to install database clusters" diff --git a/k8s-deploy/databases/02-install-database.sh b/k8s-deploy/databases/02-install-database.sh index f19a08d77b..326f467f52 100755 --- a/k8s-deploy/databases/02-install-database.sh +++ b/k8s-deploy/databases/02-install-database.sh @@ -15,6 +15,8 @@ print "Installing database clusters..." [ "$ENABLE_QDRANT" = true ] && print "Installing Qdrant cluster..." && helm upgrade --install qdrant-cluster kubeblocks/qdrant-cluster -f "$DATABASE_SCRIPT_DIR/qdrant/values.yaml" --namespace $NAMESPACE --version $ADDON_CLUSTER_CHART_VERSION [ "$ENABLE_MONGODB" = true ] && print "Installing MongoDB cluster..." && helm upgrade --install mongodb-cluster kubeblocks/mongodb-cluster -f "$DATABASE_SCRIPT_DIR/mongodb/values.yaml" --namespace $NAMESPACE --version $ADDON_CLUSTER_CHART_VERSION [ "$ENABLE_NEO4J" = true ] && print "Installing Neo4j cluster..." && helm upgrade --install neo4j-cluster kubeblocks/neo4j-cluster -f "$DATABASE_SCRIPT_DIR/neo4j/values.yaml" --namespace $NAMESPACE --version $ADDON_CLUSTER_CHART_VERSION +# DocumentDB has its own operator; apply the CR directly instead of via Helm. +[ "$ENABLE_DOCUMENTDB" = true ] && print "Installing DocumentDB cluster..." && kubectl apply -f "$DATABASE_SCRIPT_DIR/documentdb/values.yaml" --namespace $NAMESPACE # Wait for databases to be ready print "Waiting for databases to be ready..." @@ -38,6 +40,7 @@ while true; do [ "$ENABLE_QDRANT" = true ] && WAIT_CONDITIONS+=("kubectl wait --for=condition=ready pods -l app.kubernetes.io/instance=qdrant-cluster -n $NAMESPACE --timeout=10s") [ "$ENABLE_MONGODB" = true ] && WAIT_CONDITIONS+=("kubectl wait --for=condition=ready pods -l app.kubernetes.io/instance=mongodb-cluster -n $NAMESPACE --timeout=10s") [ "$ENABLE_NEO4J" = true ] && WAIT_CONDITIONS+=("kubectl wait --for=condition=ready pods -l app.kubernetes.io/instance=neo4j-cluster -n $NAMESPACE --timeout=10s") + [ "$ENABLE_DOCUMENTDB" = true ] && WAIT_CONDITIONS+=("kubectl wait --for=jsonpath='{.status.status}'='Cluster in healthy state' documentdb/documentdb-cluster -n $NAMESPACE --timeout=10s") # Check if all enabled databases are ready ALL_READY=true diff --git a/k8s-deploy/databases/03-uninstall-database.sh b/k8s-deploy/databases/03-uninstall-database.sh index be0503c561..a6514fcf85 100755 --- a/k8s-deploy/databases/03-uninstall-database.sh +++ b/k8s-deploy/databases/03-uninstall-database.sh @@ -15,6 +15,7 @@ print "Uninstalling database clusters..." [ "$ENABLE_QDRANT" = true ] && print "Uninstalling Qdrant cluster..." && helm uninstall qdrant-cluster --namespace $NAMESPACE 2>/dev/null || true [ "$ENABLE_MONGODB" = true ] && print "Uninstalling MongoDB cluster..." && helm uninstall mongodb-cluster --namespace $NAMESPACE 2>/dev/null || true [ "$ENABLE_NEO4J" = true ] && print "Uninstalling Neo4j cluster..." && helm uninstall neo4j-cluster --namespace $NAMESPACE 2>/dev/null || true +[ "$ENABLE_DOCUMENTDB" = true ] && print "Uninstalling DocumentDB cluster..." && kubectl delete -f "$DATABASE_SCRIPT_DIR/documentdb/values.yaml" --namespace $NAMESPACE 2>/dev/null || true print_success "Database clusters uninstalled" print "To uninstall database addons and KubeBlocks, run 04-cleanup.sh" diff --git a/k8s-deploy/databases/04-cleanup.sh b/k8s-deploy/databases/04-cleanup.sh index 12493501e2..37ea1cab8c 100755 --- a/k8s-deploy/databases/04-cleanup.sh +++ b/k8s-deploy/databases/04-cleanup.sh @@ -16,6 +16,14 @@ print "Uninstalling KubeBlocks database addons..." [ "$ENABLE_MONGODB" = true ] && print "Uninstalling MongoDB addon..." && helm uninstall kb-addon-mongodb --namespace kb-system 2>/dev/null || true [ "$ENABLE_NEO4J" = true ] && print "Uninstalling Neo4j addon..." && helm uninstall kb-addon-neo4j --namespace kb-system 2>/dev/null || true +# DocumentDB operator (installed by 01-prepare.sh when ENABLE_DOCUMENTDB=true). +# We leave cert-manager in place because it may be used by other workloads. +if [ "$ENABLE_DOCUMENTDB" = true ]; then + print "Uninstalling DocumentDB operator..." + helm uninstall documentdb-operator --namespace documentdb-operator 2>/dev/null || true + kubectl delete namespace documentdb-operator 2>/dev/null || true +fi + print_success "Database addons uninstallation completed!" source "$DATABASE_SCRIPT_DIR/uninstall-kubeblocks.sh" diff --git a/k8s-deploy/databases/README.md b/k8s-deploy/databases/README.md index 1e0002d07e..8e942aba82 100644 --- a/k8s-deploy/databases/README.md +++ b/k8s-deploy/databases/README.md @@ -51,6 +51,16 @@ Make sure the following tools are installed and configured: 3. **(Optional) Modify database settings** Before deployment you can edit the `values.yaml` file inside each `/` directory to change `version`, `replicas`, `CPU`, `memory`, `storage size`, etc. + > **DocumentDB note:** DocumentDB ships its own Kubernetes operator and is + > not a KubeBlocks addon. When `ENABLE_DOCUMENTDB=true`, + > `01-prepare.sh` automatically installs cert-manager (if missing) and the + > [DocumentDB operator](https://github.com/documentdb/documentdb-kubernetes-operator) + > into the `documentdb-operator` namespace, and `02-install-database.sh` + > applies the `DocumentDB` CR + credentials Secret from + > `documentdb/values.yaml` into the `rag` namespace. Replace the placeholder + > password in `documentdb/values.yaml` before applying. Requires + > Kubernetes 1.35+ (the operator uses the `ImageVolume` feature). + 4. **Install the database clusters** ```bash diff --git a/k8s-deploy/databases/documentdb/values.yaml b/k8s-deploy/databases/documentdb/values.yaml new file mode 100644 index 0000000000..f6690d6574 --- /dev/null +++ b/k8s-deploy/databases/documentdb/values.yaml @@ -0,0 +1,40 @@ +## description: DocumentDB instance for LightRAG (MongoDB-compatible storage). +## +## DocumentDB is not a KubeBlocks addon — it ships its own Kubernetes +## operator (https://github.com/documentdb/documentdb-kubernetes-operator). +## When ENABLE_DOCUMENTDB=true, 01-prepare.sh installs the operator (and +## cert-manager if missing); 02-install-database.sh then applies this manifest +## via kubectl. install_lightrag.sh reads status.connectionString from the +## resulting DocumentDB resource and wires it into LightRAG. +## +## Notes: +## - LightRAG requires DocumentDB extension v0.110.0+ for the `_id` lookup +## fix (https://github.com/documentdb/documentdb/pull/459). +## - Replace the password before using outside a throwaway dev cluster. +## - Requires Kubernetes 1.35+ (operator uses ImageVolume feature). +--- +apiVersion: v1 +kind: Secret +metadata: + name: docdb-credentials +type: Opaque +stringData: + username: docdbadmin + password: "ChangeMe!ReplaceBeforeUsing" +--- +apiVersion: documentdb.io/preview +kind: DocumentDB +metadata: + name: documentdb-cluster +spec: + nodeCount: 1 + instancesPerNode: 1 + documentDBImage: ghcr.io/documentdb/documentdb-kubernetes-operator/documentdb:0.110.0 + gatewayImage: ghcr.io/documentdb/documentdb-kubernetes-operator/gateway:0.110.0 + documentDbCredentialSecret: docdb-credentials + resource: + storage: + pvcSize: "10Gi" + exposeViaService: + serviceType: ClusterIP + sidecarInjectorPluginName: cnpg-i-sidecar-injector.documentdb.io diff --git a/k8s-deploy/install_lightrag.sh b/k8s-deploy/install_lightrag.sh index 763521e7de..6486994c74 100755 --- a/k8s-deploy/install_lightrag.sh +++ b/k8s-deploy/install_lightrag.sh @@ -4,6 +4,11 @@ NAMESPACE=rag SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )" +# Load enabled-database flags (ENABLE_POSTGRESQL, ENABLE_NEO4J, +# ENABLE_DOCUMENTDB, ...) so we only resolve credentials for engines the user +# actually deployed. +source "$SCRIPT_DIR/databases/00-config.sh" + if [ -z "$OPENAI_API_KEY" ]; then echo "OPENAI_API_KEY environment variable is not set" read -s -p "Enter your OpenAI API key: " OPENAI_API_KEY @@ -27,30 +32,89 @@ bash "$SCRIPT_DIR/databases/01-prepare.sh" bash "$SCRIPT_DIR/databases/02-install-database.sh" # Create vector extension in PostgreSQL if enabled -print "Waiting for PostgreSQL pods to be ready..." -if kubectl wait --for=condition=ready pods -l kubeblocks.io/role=primary,app.kubernetes.io/instance=pg-cluster -n $NAMESPACE --timeout=300s; then - print "Creating vector extension in PostgreSQL..." - kubectl exec -it $(kubectl get pods -l kubeblocks.io/role=primary,app.kubernetes.io/instance=pg-cluster -n $NAMESPACE -o name) -n $NAMESPACE -- psql -c "CREATE EXTENSION vector;" - print_success "Vector extension created successfully." -else - print "Warning: PostgreSQL pods not ready within timeout. Vector extension not created." +if [ "$ENABLE_POSTGRESQL" = true ]; then + print "Waiting for PostgreSQL pods to be ready..." + if kubectl wait --for=condition=ready pods -l kubeblocks.io/role=primary,app.kubernetes.io/instance=pg-cluster -n $NAMESPACE --timeout=300s; then + print "Creating vector extension in PostgreSQL..." + kubectl exec -it $(kubectl get pods -l kubeblocks.io/role=primary,app.kubernetes.io/instance=pg-cluster -n $NAMESPACE -o name) -n $NAMESPACE -- psql -c "CREATE EXTENSION vector;" + print_success "Vector extension created successfully." + else + print "Warning: PostgreSQL pods not ready within timeout. Vector extension not created." + fi fi # Get database passwords from Kubernetes secrets echo "Retrieving database credentials from Kubernetes secrets..." -POSTGRES_PASSWORD=$(kubectl get secrets -n rag pg-cluster-postgresql-account-postgres -o jsonpath='{.data.password}' | base64 -d) -if [ -z "$POSTGRES_PASSWORD" ]; then - echo "Error: Could not retrieve PostgreSQL password. Make sure PostgreSQL is deployed and the secret exists." - exit 1 + +HELM_OVERRIDES=() + +if [ "$ENABLE_POSTGRESQL" = true ]; then + POSTGRES_PASSWORD=$(kubectl get secrets -n rag pg-cluster-postgresql-account-postgres -o jsonpath='{.data.password}' | base64 -d) + if [ -z "$POSTGRES_PASSWORD" ]; then + echo "Error: Could not retrieve PostgreSQL password. Make sure PostgreSQL is deployed and the secret exists." + exit 1 + fi + export POSTGRES_PASSWORD=$POSTGRES_PASSWORD + HELM_OVERRIDES+=(--set-string "env.POSTGRES_PASSWORD=$POSTGRES_PASSWORD") fi -export POSTGRES_PASSWORD=$POSTGRES_PASSWORD -NEO4J_PASSWORD=$(kubectl get secrets -n rag neo4j-cluster-neo4j-account-neo4j -o jsonpath='{.data.password}' | base64 -d) -if [ -z "$NEO4J_PASSWORD" ]; then - echo "Error: Could not retrieve Neo4J password. Make sure Neo4J is deployed and the secret exists." - exit 1 +if [ "$ENABLE_NEO4J" = true ]; then + NEO4J_PASSWORD=$(kubectl get secrets -n rag neo4j-cluster-neo4j-account-neo4j -o jsonpath='{.data.password}' | base64 -d) + if [ -z "$NEO4J_PASSWORD" ]; then + echo "Error: Could not retrieve Neo4J password. Make sure Neo4J is deployed and the secret exists." + exit 1 + fi + export NEO4J_PASSWORD=$NEO4J_PASSWORD + HELM_OVERRIDES+=(--set-string "env.NEO4J_PASSWORD=$NEO4J_PASSWORD") +fi + +if [ "$ENABLE_DOCUMENTDB" = true ]; then + echo "Waiting for DocumentDB cluster to become healthy..." + kubectl wait --for=jsonpath='{.status.status}'="Cluster in healthy state" \ + documentdb/documentdb-cluster -n $NAMESPACE --timeout=300s || { + echo "Error: DocumentDB cluster did not reach healthy state." + exit 1 + } + + # The operator publishes a connection string on the DocumentDB resource + # status. It contains embedded $(kubectl get secret ...) substitutions, so + # `eval` resolves the credentials. Trust this field only because we created + # the DocumentDB resource ourselves above. + RAW_CONN=$(kubectl get documentdb documentdb-cluster -n $NAMESPACE \ + -o jsonpath='{.status.connectionString}') + if [ -z "$RAW_CONN" ]; then + echo "Error: DocumentDB status.connectionString is empty." + exit 1 + fi + MONGO_URI=$(eval "echo \"$RAW_CONN\"") + + # Replace the gateway ClusterIP with its in-cluster DNS name so the URI + # remains valid across pod restarts. + SVC_IP=$(kubectl get svc "documentdb-service-documentdb-cluster" -n $NAMESPACE -o jsonpath='{.spec.clusterIP}' 2>/dev/null) || true + if [ -n "$SVC_IP" ]; then + SVC_DNS="documentdb-service-documentdb-cluster.${NAMESPACE}.svc.cluster.local" + MONGO_URI=$(echo "$MONGO_URI" | sed "s/$SVC_IP/$SVC_DNS/g") + fi + + # The connection string sets both directConnection=true and replicaSet=rs0. + # pymongo treats these as conflicting (the gateway advertises itself as + # standalone, not as a member of "rs0"), so strip replicaSet for direct + # gateway connections. + MONGO_URI=$(echo "$MONGO_URI" | sed -E 's/[?&]replicaSet=[^&]*//g') + export MONGO_URI + + # Switch storage backends to DocumentDB-backed Mongo* implementations. + # NanoVectorDBStorage stays on the local PVC because DocumentDB lacks the + # MongoDB Atlas $vectorSearch operator that MongoVectorDBStorage requires. + HELM_OVERRIDES+=( + --set-string "env.MONGO_URI=$MONGO_URI" + --set-string "env.MONGO_DATABASE=lightrag" + --set-string "env.LIGHTRAG_KV_STORAGE=MongoKVStorage" + --set-string "env.LIGHTRAG_GRAPH_STORAGE=MongoGraphStorage" + --set-string "env.LIGHTRAG_DOC_STATUS_STORAGE=MongoDocStatusStorage" + --set-string "env.LIGHTRAG_VECTOR_STORAGE=NanoVectorDBStorage" + ) fi -export NEO4J_PASSWORD=$NEO4J_PASSWORD #REDIS_PASSWORD=$(kubectl get secrets -n rag redis-cluster-redis-account-default -o jsonpath='{.data.password}' | base64 -d) #if [ -z "$REDIS_PASSWORD" ]; then @@ -68,8 +132,7 @@ fi helm upgrade --install lightrag $SCRIPT_DIR/lightrag \ --namespace $NAMESPACE \ - --set-string env.POSTGRES_PASSWORD=$POSTGRES_PASSWORD \ - --set-string env.NEO4J_PASSWORD=$NEO4J_PASSWORD \ + "${HELM_OVERRIDES[@]}" \ --set-string env.LLM_BINDING=openai \ --set-string env.LLM_MODEL=gpt-4o-mini \ --set-string env.LLM_BINDING_HOST=$OPENAI_API_BASE \ diff --git a/k8s-deploy/lightrag/values.yaml b/k8s-deploy/lightrag/values.yaml index d8057d1e1d..02661c79d8 100644 --- a/k8s-deploy/lightrag/values.yaml +++ b/k8s-deploy/lightrag/values.yaml @@ -80,3 +80,14 @@ env: # Replace with your Qdrant credentials QDRANT_URL: http://qdrant-cluster-qdrant-qdrant:6333 # REDIS_URI: redis://default:${REDIS_PASSWORD}@redis-cluster-redis-redis:6379 + # DocumentDB (MongoDB-compatible). When ENABLE_DOCUMENTDB=true, + # install_lightrag.sh resolves MONGO_URI from the DocumentDB resource's + # status.connectionString and overrides the storage backends below to: + # LIGHTRAG_KV_STORAGE: MongoKVStorage + # LIGHTRAG_GRAPH_STORAGE: MongoGraphStorage + # LIGHTRAG_DOC_STATUS_STORAGE: MongoDocStatusStorage + # LIGHTRAG_VECTOR_STORAGE: NanoVectorDBStorage + # NanoVectorDBStorage is used because DocumentDB does not implement the + # MongoDB Atlas $vectorSearch operator that MongoVectorDBStorage requires. + # MONGO_URI: + MONGO_DATABASE: lightrag