Skip to content
Open
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,110 @@
import { WeaviateStore } from '@langchain/weaviate'
import { VectorStoreRetriever, VectorStoreRetrieverInput } from '@langchain/core/vectorstores'
import { Document } from '@langchain/core/documents'
import { get } from 'lodash'

type WeaviateHybridInput<V extends WeaviateStore> = Omit<VectorStoreRetrieverInput<V>, 'k'> & {
alpha: number
topK: number
resultFormat?: string
fusionType?: 'RankedFusion' | 'RelativeScoreFusion'
}

export class HybridSearchRetriever<V extends WeaviateStore> extends VectorStoreRetriever<V> {
resultFormat: string
alpha: number
topK: number
fusionType: string

constructor(input: WeaviateHybridInput<V>) {
super(input)
this.vectorStore = input.vectorStore
this.alpha = input.alpha
this.topK = input.topK
this.fusionType = input.fusionType ? this.fusionType : 'RankedFusion'
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

high

The assignment logic for fusionType is incorrect. It currently assigns this.fusionType (which is uninitialized and thus undefined) to itself when input.fusionType is truthy. It should be assigned from input.fusionType.

Suggested change
this.fusionType = input.fusionType ? this.fusionType : 'RankedFusion'
this.fusionType = input.fusionType ?? 'RankedFusion'

}

async _getRelevantDocuments(query: string): Promise<Document[]> {
const results = await this.vectorStore.hybridSearch(query, {
limit: this.topK,
alpha: this.alpha,
filters: this.filter
})
Comment on lines +28 to +33
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

high

The fusionType property is not being passed to the hybridSearch method call. This means the user's configuration for result merging (Ranked vs Relative Score) will be ignored during the search execution.

        const results = await this.vectorStore.hybridSearch(query, {
            limit: this.topK,
            alpha: this.alpha,
            fusionType: this.fusionType,
            filters: this.filter
        })

if (this.resultFormat != undefined) {
return results.map((doc) => {
let resContent = this.resultFormat.replace(/{{context}}/g, doc.pageContent)
resContent = replaceMetadata(resContent, doc.metadata)

return new Document({
pageContent: resContent,
metadata: doc.metadata
})
})
} else {
return results
}
}

static fromVectorStore<V extends WeaviateStore>(vectorStore: V, options: Omit<WeaviateHybridInput<V>, 'vectorStore'>) {
return new this<V>({ ...options, vectorStore })
}
}

function replaceMetadata(template: string, metadata: Record<string, any>): string {
const metadataRegex = /{{metadata\.([\w.]+)}}/g
return template.replace(metadataRegex, (match, path) => {
const value = get(metadata, path)
return value !== undefined ? String(value) : match
})
}

export const processSearchFilter = (filterInput: any, client: any, indexName: string) => {
if (!filterInput) return undefined
let rawFilter = filterInput?.where ?? filterInput

if (rawFilter.operator === 'And' || rawFilter.operator === 'Or') {
const subFilters = rawFilter.operands?.map((operand: any) => processSearchFilter(operand, client, indexName)).filter(Boolean)

if (!subFilters?.length) return undefined

return rawFilter.operator === 'And'
? subFilters.reduce((acc: any, f: any) => acc.and(f))
: subFilters.reduce((acc: any, f: any) => acc.or(f))
}

if (rawFilter?.path && rawFilter?.operator) {
const propName = Array.isArray(rawFilter.path) ? rawFilter.path[0] : rawFilter.path
const operator = rawFilter.operator
const propValue =
rawFilter.valueText ??
rawFilter.valueString ??
rawFilter.valueInt ??
rawFilter.valueNumber ??
rawFilter.valueBoolean ??
rawFilter.valueDate ??
rawFilter.valueTextArray ??
rawFilter.valueStringArray ??
rawFilter.valueIntArray ??
rawFilter.valueNumberArray ??
rawFilter.valueBooleanArray ??
rawFilter.valueDateArray

const filter = client.collections.get(indexName).filter.byProperty(propName)

const operatorMap: Record<string, (v: any) => any> = {
Equal: (v) => filter.equal(v),
NotEqual: (v) => filter.notEqual(v),
GreaterThan: (v) => filter.greaterThan(v),
GreaterThanEqual: (v) => filter.greaterOrEqual(v),
LessThan: (v) => filter.lessThan(v),
LessThanEqual: (v) => filter.lessOrEqual(v),
Like: (v) => filter.like(v),
ContainsAny: (v) => filter.containsAny(v),
ContainsAll: (v) => filter.containsAll(v)
}

return operatorMap[operator]?.(propValue)
}

return undefined
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,139 @@
import { WeaviateStore } from '@langchain/weaviate'
import { INode, INodeData, INodeParams, INodeOutputsValue } from '../../../src/Interface'
import { handleEscapeCharacters } from '../../../src'
import { HybridSearchRetriever } from './HybridSearchRetriever'

const defaultReturnFormat = '{{context}}\nSource: {{metadata.source}}'

class WeaviateRetriever_Retrievers implements INode {
label: string
name: string
version: number
description: string
type: string
icon: string
category: string
baseClasses: string[]
inputs: INodeParams[]
outputs: INodeOutputsValue[]

constructor() {
this.label = 'Weaviate Retriever'
this.name = 'weaviateRetriever'
this.version = 1.0
this.type = 'WeaviateRetriever'
this.icon = 'weaviateRetriever.png'
this.category = 'Retrievers'
this.description = 'Weaviate hybrid search combining vector similarity and BM25 keyword search'
this.baseClasses = [this.type, 'BaseRetriever']
this.inputs = [
{
label: 'Weaviate Vector Store',
name: 'vectorStore',
type: 'VectorStore'
},
{
label: 'Query',
name: 'query',
type: 'string',
description: 'Query to retrieve documents from retriever. If not specified, user question will be used',
optional: true,
acceptVariable: true
},
{
label: 'Result Format',
name: 'resultFormat',
type: 'string',
rows: 4,
description:
'Format to return the results in. Use {{context}} to insert the pageContent of the document and {{metadata.key}} to insert metadata values.',
default: defaultReturnFormat
},
{
label: 'Alpha',
name: 'alpha',
type: 'number',
description:
'Number between 0 and 1 that determines the weighting of keyword (BM25) portion of the hybrid search. A value of 1 is a pure vector search, while 0 is a pure keyword search.',
default: 0.5,
step: 0.1,
optional: true
},
{
label: 'Top K',
name: 'topK',
description: 'Number of top results to fetch. Default to vector store topK',
placeholder: '4',
type: 'number',
optional: true
},
{
label: 'fusionType',
name: 'fusionType',
type: 'options',
default: 'RankedFusion',
description:
"Method to merge results: 'RankedFusion' combines by document rank, while 'RelativeScoreFusion' combines by normalized scores.",
options: [
{
label: 'RankedFusion',
name: 'RankedFusion'
},
{
label: 'RelativeScoreFusion',
name: 'RelativeScoreFusion'
}
],
optional: true
}
]
this.outputs = [
{
label: 'Weaviate Retriever',
name: 'retriever',
baseClasses: this.baseClasses
},
{
label: 'Document',
name: 'document',
description: 'Array of document objects containing metadata and pageContent',
baseClasses: ['Document', 'json']
},
{
label: 'Text',
name: 'text',
description: 'Concatenated string from pageContent of documents',
baseClasses: ['string', 'json']
}
]
}

async init(nodeData: INodeData, input: string): Promise<any> {
const vectorStore = nodeData.inputs?.vectorStore as WeaviateStore
const query = nodeData.inputs?.query as string
const topK = nodeData.inputs?.topK as string
const alpha = nodeData.inputs?.alpha as string
const resultFormat = nodeData.inputs?.resultFormat as string
const output = nodeData.outputs?.output as string

const retriever = HybridSearchRetriever.fromVectorStore(vectorStore, {
resultFormat,
alpha: alpha ? parseFloat(alpha) : 0.5,
topK: topK ? parseInt(topK, 10) : 4
})

const searchPath = query ? query : input

if (output === 'retriever') return retriever
else if (output === 'document') return await retriever._getRelevantDocuments(searchPath)
else if (output === 'text') {
const docs = await retriever._getRelevantDocuments(searchPath)
const finaltext = docs.map((doc) => doc.pageContent).join('\n')
return handleEscapeCharacters(finaltext, false)
}

return retriever
}
}

module.exports = { nodeClass: WeaviateRetriever_Retrievers }
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
13 changes: 12 additions & 1 deletion packages/components/nodes/tools/RetrieverTool/RetrieverTool.ts
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ import { getBaseClasses, resolveFlowObjValue, parseWithTypeConversion } from '..
import { SOURCE_DOCUMENTS_PREFIX } from '../../../src/agents'
import { RunnableConfig } from '@langchain/core/runnables'
import { VectorStoreRetriever } from '@langchain/core/vectorstores'
import { processSearchFilter } from '../../retrievers/WeaviateRetriever/HybridSearchRetriever'

const howToUse = `Add additional filters to vector store. You can also filter with flow config, including the current "state":
- \`$flow.sessionId\`
Expand Down Expand Up @@ -203,7 +204,17 @@ class Retriever_Tools implements INode {

if (newMetadataFilter && typeof newMetadataFilter === 'object' && Object.keys(newMetadataFilter).length > 0) {
const vectorStore = (retriever as VectorStoreRetriever<any>).vectorStore
vectorStore.filter = newMetadataFilter
if (vectorStore.constructor.name === 'WeaviateStore' || vectorStore.lc_namespace?.includes('weaviate')) {
const client = (vectorStore as any).client
const indexName = (vectorStore as any).indexName
if (client && indexName) {
const newWeaviateMetadataFilter = processSearchFilter(newMetadataFilter, client, indexName)
const weaviateRetriever = retriever as VectorStoreRetriever
weaviateRetriever.filter = newWeaviateMetadataFilter
}
} else {
vectorStore.filter = newMetadataFilter
}
}
}
const docs = await retriever.invoke(input)
Expand Down
Loading