Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -4,15 +4,11 @@ import {
UnstructuredLoaderOptions,
UnstructuredLoaderStrategy,
SkipInferTableTypes,
HiResModelName,
UnstructuredLoader as LCUnstructuredLoader
HiResModelName
} from '@langchain/community/document_loaders/fs/unstructured'
import { getCredentialData, getCredentialParam, handleEscapeCharacters } from '../../../src/utils'
import { getFileFromStorage, INodeOutputsValue } from '../../../src'
import { UnstructuredLoader } from './Unstructured'
import { isPathTraversal, isUnsafeFilePath } from '../../../src/validator'
import sanitize from 'sanitize-filename'
import path from 'path'

class UnstructuredFile_DocumentLoaders implements INode {
label: string
Expand Down Expand Up @@ -44,17 +40,6 @@ class UnstructuredFile_DocumentLoaders implements INode {
optional: true
}
this.inputs = [
/** Deprecated
{
label: 'File Path',
name: 'filePath',
type: 'string',
placeholder: '',
optional: true,
warning:
'Use the File Upload instead of File path. If file is uploaded, this path is ignored. Path will be deprecated in future releases.'
},
*/
{
label: 'Files Upload',
name: 'fileObject',
Expand Down Expand Up @@ -455,7 +440,6 @@ class UnstructuredFile_DocumentLoaders implements INode {
}

async init(nodeData: INodeData, _: string, options: ICommonObject): Promise<any> {
const filePath = nodeData.inputs?.filePath as string
const unstructuredAPIUrl = nodeData.inputs?.unstructuredAPIUrl as string
const strategy = nodeData.inputs?.strategy as UnstructuredLoaderStrategy
const encoding = nodeData.inputs?.encoding as string
Expand Down Expand Up @@ -560,37 +544,8 @@ class UnstructuredFile_DocumentLoaders implements INode {
docs.push(...loaderDocs)
}
}
} else if (filePath) {
if (!filePath || typeof filePath !== 'string') {
throw new Error('Invalid file path format')
}

if (isPathTraversal(filePath) || isUnsafeFilePath(filePath)) {
throw new Error('Invalid path characters detected in filePath - path traversal not allowed')
}

const parsedPath = path.parse(filePath)
const sanitizedFilename = sanitize(parsedPath.base)

if (!sanitizedFilename || sanitizedFilename.trim() === '') {
throw new Error('Invalid filename after sanitization')
}

const sanitizedFilePath = path.join(parsedPath.dir, sanitizedFilename)

if (!path.isAbsolute(sanitizedFilePath)) {
throw new Error('File path must be absolute')
}

if (sanitizedFilePath.includes('..')) {
throw new Error('Invalid file path - directory traversal not allowed')
}

const loader = new LCUnstructuredLoader(sanitizedFilePath, obj)
const loaderDocs = await loader.load()
docs.push(...loaderDocs)
} else {
throw new Error('File path or File upload is required')
throw new Error('File upload is required')
}

if (metadata) {
Expand Down