Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions packages/components/nodes/documentloaders/File/File.ts
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ import { LoadOfSheet } from '../MicrosoftExcel/ExcelLoader'
import { PowerpointLoader } from '../MicrosoftPowerpoint/PowerpointLoader'
import { Document } from '@langchain/core/documents'
import { getFileFromStorage } from '../../../src/storageUtils'
import { handleEscapeCharacters, mapMimeTypeToExt } from '../../../src/utils'
import { handleEscapeCharacters, loadLegacyPdfJs, mapMimeTypeToExt } from '../../../src/utils'

class File_DocumentLoaders implements INode {
label: string
Expand Down Expand Up @@ -236,13 +236,13 @@ class File_DocumentLoaders implements INode {
splitPages: false,
pdfjs: () =>
// @ts-ignore
legacyBuild ? import('pdfjs-dist/legacy/build/pdf.js') : import('pdf-parse/lib/pdf.js/v1.10.100/build/pdf.js')
legacyBuild ? loadLegacyPdfJs() : import('pdf-parse/lib/pdf.js/v1.10.100/build/pdf.js')
})
: // @ts-ignore
new PDFLoader(blob, {
pdfjs: () =>
// @ts-ignore
legacyBuild ? import('pdfjs-dist/legacy/build/pdf.js') : import('pdf-parse/lib/pdf.js/v1.10.100/build/pdf.js')
legacyBuild ? loadLegacyPdfJs() : import('pdf-parse/lib/pdf.js/v1.10.100/build/pdf.js')
}),
'': (blob) => new TextLoader(blob)
})
Expand Down
6 changes: 3 additions & 3 deletions packages/components/nodes/documentloaders/Pdf/Pdf.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ import { omit } from 'lodash'
import { IDocument, ICommonObject, INode, INodeData, INodeParams } from '../../../src/Interface'
import { TextSplitter } from '@langchain/textsplitters'
import { PDFLoader } from '@langchain/community/document_loaders/fs/pdf'
import { getFileFromStorage, handleEscapeCharacters, INodeOutputsValue } from '../../../src'
import { getFileFromStorage, handleEscapeCharacters, INodeOutputsValue, loadLegacyPdfJs } from '../../../src'

class Pdf_DocumentLoaders implements INode {
label: string
Expand Down Expand Up @@ -196,7 +196,7 @@ class Pdf_DocumentLoaders implements INode {
splitPages: false,
pdfjs: () =>
// @ts-ignore
legacyBuild ? import('pdfjs-dist/legacy/build/pdf.js') : import('pdf-parse/lib/pdf.js/v1.10.100/build/pdf.js')
legacyBuild ? loadLegacyPdfJs() : import('pdf-parse/lib/pdf.js/v1.10.100/build/pdf.js')
})
if (textSplitter) {
let splittedDocs = await loader.load()
Expand All @@ -209,7 +209,7 @@ class Pdf_DocumentLoaders implements INode {
const loader = new PDFLoader(new Blob([new Uint8Array(bf)]), {
pdfjs: () =>
// @ts-ignore
legacyBuild ? import('pdfjs-dist/legacy/build/pdf.js') : import('pdf-parse/lib/pdf.js/v1.10.100/build/pdf.js')
legacyBuild ? loadLegacyPdfJs() : import('pdf-parse/lib/pdf.js/v1.10.100/build/pdf.js')
})
if (textSplitter) {
let splittedDocs = await loader.load()
Expand Down
3 changes: 2 additions & 1 deletion packages/components/nodes/tools/Arxiv/core.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ import { z } from 'zod/v3'
import fetch from 'node-fetch'
import { PDFLoader } from '@langchain/community/document_loaders/fs/pdf'
import { DynamicStructuredTool } from '../OpenAPIToolkit/core'
import { loadLegacyPdfJs } from '../../../src/utils'

export const desc = `Use this tool to search for academic papers on Arxiv. You can search by keywords, topics, authors, or specific Arxiv IDs. The tool can return either paper summaries or download and extract full paper content.`

Expand Down Expand Up @@ -184,7 +185,7 @@ export class ArxivTool extends DynamicStructuredTool {
splitPages: false,
pdfjs: () =>
// @ts-ignore
this.legacyBuild ? import('pdfjs-dist/legacy/build/pdf.js') : import('pdf-parse/lib/pdf.js/v1.10.100/build/pdf.js')
this.legacyBuild ? loadLegacyPdfJs() : import('pdf-parse/lib/pdf.js/v1.10.100/build/pdf.js')
})

const docs = await loader.load()
Expand Down
22 changes: 21 additions & 1 deletion packages/components/src/utils.test.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,10 @@
import { removeInvalidImageMarkdown, convertRequireToImport, COMMONJS_REQUIRE_REGEX, IMPORT_EXTRACTION_REGEX } from './utils'
import {
removeInvalidImageMarkdown,
convertRequireToImport,
COMMONJS_REQUIRE_REGEX,
IMPORT_EXTRACTION_REGEX,
loadLegacyPdfJs
} from './utils'

describe('removeInvalidImageMarkdown', () => {
describe('strips non-http/https image markdown', () => {
Expand Down Expand Up @@ -229,3 +235,17 @@ describe('Import extraction regex (utils.ts line 1596 pattern)', () => {
expect(extractModules('console.log("hello")')).toEqual([])
})
})

describe('loadLegacyPdfJs', () => {
it('loads pdfjs-dist legacy pdf.mjs through a native file URL import', async () => {
const getDocument = jest.fn()
const importer = jest.fn().mockResolvedValue({ getDocument, version: '5.3.93' })
const resolver = jest.fn().mockReturnValue('/tmp/pdfjs-dist/legacy/build/pdf.mjs')

const loaded = await loadLegacyPdfJs(importer as any, resolver as any)

expect(resolver).toHaveBeenCalledWith('pdfjs-dist/legacy/build/pdf.mjs')
expect(importer).toHaveBeenCalledWith('file:///tmp/pdfjs-dist/legacy/build/pdf.mjs')
expect(loaded).toEqual({ getDocument, version: '5.3.93' })
})
})
21 changes: 21 additions & 0 deletions packages/components/src/utils.ts
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ import { JSDOM } from 'jsdom'
import JSON5 from 'json5'
import { cloneDeep, get, omit } from 'lodash'
import * as path from 'path'
import { pathToFileURL } from 'url'
import TurndownService from 'turndown'
import { DataSource, Equal } from 'typeorm'
import { NodeVM } from 'vm2'
Expand All @@ -28,6 +29,26 @@ export const numberOrExpressionRegex = '^(\\d+\\.?\\d*|{{.*}})$' //return true i
export const notEmptyRegex = '(.|\\s)*\\S(.|\\s)*' //return true if string is not empty or blank
export const FLOWISE_CHATID = 'flowise_chatId'

type NativeModuleImporter = (specifier: string) => Promise<any>
type ModuleResolver = (specifier: string) => string

// Keep import() behind Function so TypeScript does not lower it to require(),
// which cannot load pdfjs-dist's legacy ESM build from CommonJS output.
const nativeImport: NativeModuleImporter = new Function('specifier', 'return import(specifier)') as NativeModuleImporter

export const loadLegacyPdfJs = async (
importer: NativeModuleImporter = nativeImport,
resolver: ModuleResolver = (specifier) => require.resolve(specifier)
): Promise<{ getDocument: unknown; version?: string }> => {
const modulePath = resolver('pdfjs-dist/legacy/build/pdf.mjs')
const pdfjs = await importer(pathToFileURL(modulePath).href)

return {
getDocument: pdfjs.getDocument,
version: pdfjs.version
}
}
Comment on lines +42 to +50
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

The loadLegacyPdfJs function currently returns a subset of the pdfjs-dist module (getDocument and version). This might break compatibility if the consumer (e.g., PDFLoader or a user-defined script) expects other parts of the module, such as GlobalWorkerOptions or OPS. It is safer and more flexible to return the entire module object, matching the behavior of a standard dynamic import(). Per repository guidelines, using a default implementation is preferred unless specific behavior is required, and signature changes are acceptable for internal functions if callers are unaffected.

export const loadLegacyPdfJs = async (
    importer: NativeModuleImporter = nativeImport,
    resolver: ModuleResolver = (specifier) => require.resolve(specifier)
): Promise<any> => {
    const modulePath = resolver('pdfjs-dist/legacy/build/pdf.mjs')
    return await importer(pathToFileURL(modulePath).href)
}
References
  1. Use a default (fallback) implementation unless the specific implementation has meaningfully different behavior or provides better error messages.
  2. A signature change that would be a breaking change for a public API is acceptable for an internal function if all its callers are known and unaffected by the change.


let secretsManagerClient: SecretsManagerClient | null = null
const USE_AWS_SECRETS_MANAGER = process.env.SECRETKEY_STORAGE_TYPE === 'aws'
if (USE_AWS_SECRETS_MANAGER) {
Expand Down
92 changes: 61 additions & 31 deletions packages/ui/src/ui-component/extended/FileUpload.jsx
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ const message = `The full contents of uploaded files will be converted to text a
<br />
Refer <a href='https://docs.flowiseai.com/using-flowise/uploads#files' target='_blank' style='color: #2196f3'>docs</a> for more details.`

const availableFileTypes = [
export const availableFileTypes = [
{ name: 'CSS', ext: 'text/css', extension: '.css' },
{ name: 'CSV', ext: 'text/csv', extension: '.csv' },
{ name: 'HTML', ext: 'text/html', extension: '.html' },
Expand All @@ -51,6 +51,46 @@ const availableFileTypes = [
{ name: 'PPTX', ext: 'application/vnd.openxmlformats-officedocument.presentationml.presentation', extension: '.pptx' }
]

export const buildFullFileUploadConfig = ({ fullFileUpload, allowedFileTypes, pdfUsage, pdfLegacyBuild }) => ({
status: fullFileUpload,
allowedUploadFileTypes: allowedFileTypes.join(','),
pdfFile: {
usage: pdfUsage,
legacyBuild: pdfLegacyBuild
}
})

export const getInitialFileUploadState = (dialogProps) => {
const initialState = {
fullFileUpload: false,
allowedFileTypes: availableFileTypes.map((fileType) => fileType.ext),
chatbotConfig: {},
pdfUsage: 'perPage',
pdfLegacyBuild: false
}

if (!dialogProps?.chatflow?.chatbotConfig) {
return initialState
}

try {
const chatbotConfig = JSON.parse(dialogProps.chatflow.chatbotConfig)
const fullFileUploadConfig = chatbotConfig?.fullFileUpload

return {
fullFileUpload: !!fullFileUploadConfig?.status,
allowedFileTypes: fullFileUploadConfig?.allowedUploadFileTypes
? fullFileUploadConfig.allowedUploadFileTypes.split(',')
: initialState.allowedFileTypes,
chatbotConfig: chatbotConfig || {},
pdfUsage: fullFileUploadConfig?.pdfFile?.usage || initialState.pdfUsage,
pdfLegacyBuild: fullFileUploadConfig?.pdfFile?.legacyBuild ?? initialState.pdfLegacyBuild
}
} catch (e) {
return initialState
}
}

const FileUpload = ({ dialogProps }) => {
const dispatch = useDispatch()
const customization = useSelector((state) => state.customization)
Expand All @@ -64,6 +104,7 @@ const FileUpload = ({ dialogProps }) => {
const [allowedFileTypes, setAllowedFileTypes] = useState([])
const [chatbotConfig, setChatbotConfig] = useState({})
const [pdfUsage, setPdfUsage] = useState('perPage')
const [pdfLegacyBuild, setPdfLegacyBuild] = useState(false)
const handleChange = (value) => {
setFullFileUpload(value)
}
Expand All @@ -81,15 +122,13 @@ const FileUpload = ({ dialogProps }) => {
setPdfUsage(event.target.value)
}

const handlePdfLegacyBuildChange = (value) => {
setPdfLegacyBuild(value)
}

const onSave = async () => {
try {
const value = {
status: fullFileUpload,
allowedUploadFileTypes: allowedFileTypes.join(','),
pdfFile: {
usage: pdfUsage
}
}
const value = buildFullFileUploadConfig({ fullFileUpload, allowedFileTypes, pdfUsage, pdfLegacyBuild })
chatbotConfig.fullFileUpload = value

const saveResp = await chatflowsApi.updateChatflow(dialogProps.chatflow.id, {
Expand Down Expand Up @@ -130,29 +169,12 @@ const FileUpload = ({ dialogProps }) => {
}

useEffect(() => {
/* backward compatibility - by default, allow all */
const allowedFileTypes = availableFileTypes.map((fileType) => fileType.ext)
setAllowedFileTypes(allowedFileTypes)
if (dialogProps.chatflow) {
if (dialogProps.chatflow.chatbotConfig) {
try {
let chatbotConfig = JSON.parse(dialogProps.chatflow.chatbotConfig)
setChatbotConfig(chatbotConfig || {})
if (chatbotConfig.fullFileUpload) {
setFullFileUpload(chatbotConfig.fullFileUpload.status)
}
if (chatbotConfig.fullFileUpload?.allowedUploadFileTypes) {
const allowedFileTypes = chatbotConfig.fullFileUpload.allowedUploadFileTypes.split(',')
setAllowedFileTypes(allowedFileTypes)
}
if (chatbotConfig.fullFileUpload?.pdfFile?.usage) {
setPdfUsage(chatbotConfig.fullFileUpload.pdfFile.usage)
}
} catch (e) {
setChatbotConfig({})
}
}
}
const initialState = getInitialFileUploadState(dialogProps)
setAllowedFileTypes(initialState.allowedFileTypes)
setChatbotConfig(initialState.chatbotConfig)
setFullFileUpload(initialState.fullFileUpload)
setPdfUsage(initialState.pdfUsage)
setPdfLegacyBuild(initialState.pdfLegacyBuild)

return () => {}
}, [dialogProps])
Expand Down Expand Up @@ -268,6 +290,14 @@ const FileUpload = ({ dialogProps }) => {
/>
</RadioGroup>
</FormControl>
<Box sx={{ mt: 0.5 }}>
<SwitchInput
label='Use Legacy Build'
onChange={handlePdfLegacyBuildChange}
value={pdfLegacyBuild}
disabled={!fullFileUpload}
/>
</Box>
</Box>
)}
</AccordionDetails>
Expand Down