Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 5 additions & 3 deletions docs/developers/tools/file-system.md
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ Qwen Code provides a comprehensive suite of tools for interacting with the local

## 2. `read_file` (ReadFile)

`read_file` reads and returns the content of a specified file. This tool handles text, images (PNG, JPG, GIF, WEBP, SVG, BMP), and PDF files. For text files, it can read specific line ranges. Other binary file types are generally skipped.
`read_file` reads and returns the content of a specified file. This tool handles text and images (PNG, JPG, GIF, WEBP, SVG, BMP). For text files, it can read specific line ranges. PDF files are not supported directly - extract text externally first. Other binary file types are generally skipped.

- **Tool name:** `read_file`
- **Display name:** ReadFile
Expand All @@ -35,11 +35,13 @@ Qwen Code provides a comprehensive suite of tools for interacting with the local
- `limit` (number, optional): For text files, the maximum number of lines to read. If omitted, reads a default maximum (e.g., 2000 lines) or the entire file if feasible.
- **Behavior:**
- For text files: Returns the content. If `offset` and `limit` are used, returns only that slice of lines. Indicates if content was truncated due to line limits or line length limits.
- For image and PDF files: Returns the file content as a base64-encoded data structure suitable for model consumption.
- For image files: Returns the file content as a base64-encoded `inlineData` object suitable for model consumption.
- For PDF files: Returns an error message directing users to extract text externally.
- For other binary files: Attempts to identify and skip them, returning a message indicating it's a generic binary file.
- **Output:** (`llmContent`):
- For text files: The file content, potentially prefixed with a truncation message (e.g., `[File content truncated: showing lines 1-100 of 500 total lines...]\nActual file content...`).
- For image/PDF files: An object containing `inlineData` with `mimeType` and base64 `data` (e.g., `{ inlineData: { mimeType: 'image/png', data: 'base64encodedstring' } }`).
- For image files: An object containing `inlineData` with `mimeType` and base64 `data` (e.g., `{ inlineData: { mimeType: 'image/png', data: 'base64encodedstring' } }`).
- For PDF files: An error message string explaining that PDFs are not supported.
- For other binary files: A message like `Cannot display content of binary file: /path/to/data.bin`.
- **Confirmation:** No.

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -813,7 +813,12 @@ describe('StreamingToolCallParser', () => {

it('should return true when a tool call is inside a string literal', () => {
// Simulate truncation mid-string: {"file_path": "/tmp/test.txt", "content": "some text
parser.addChunk(0, '{"file_path": "/tmp/test.txt"', 'call_1', 'write_file');
parser.addChunk(
0,
'{"file_path": "/tmp/test.txt"',
'call_1',
'write_file',
);
parser.addChunk(0, ', "content": "some text');
const state = parser.getState(0);
expect(state.inString).toBe(true);
Expand Down
67 changes: 54 additions & 13 deletions packages/core/src/utils/fileUtils.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,7 @@ describe('fileUtils', () => {
getTruncateToolOutputThreshold: () => 2500,
getTruncateToolOutputLines: () => 500,
getTargetDir: () => tempRootDir,
getModel: () => 'qwen3.5-plus', // Default model with image+video support
} as unknown as Config;

beforeEach(() => {
Expand Down Expand Up @@ -738,29 +739,69 @@ describe('fileUtils', () => {
expect(result.returnDisplay).toContain('Read image file: image.png');
});

it('should process a PDF file', async () => {
it('should reject image files when model does not support image', async () => {
const fakePngData = Buffer.from('fake png data');
actualNodeFs.writeFileSync(testImageFilePath, fakePngData);
mockMimeGetType.mockReturnValue('image/png');

// Use a model that doesn't support image (text-only model)
const mockConfigNoImage = {
...mockConfig,
getModel: () => 'deepseek-chat',
} as unknown as Config;

const result = await processSingleFileContent(
testImageFilePath,
mockConfigNoImage,
);
expect(typeof result.llmContent).toBe('string');
expect(result.llmContent).toContain('does not support image input');
expect(result.returnDisplay).toContain('Skipped image file');
expect(result.error).toContain('does not support image input');
});

it('should reject PDF files when model does not support PDF', async () => {
const fakePdfData = Buffer.from('fake pdf data');
actualNodeFs.writeFileSync(testPdfFilePath, fakePdfData);
mockMimeGetType.mockReturnValue('application/pdf');

// Use a model that doesn't support PDF (e.g., qwen text-only model)
const mockConfigNoPdf = {
...mockConfig,
getModel: () => 'qwen3-coder-plus',
} as unknown as Config;

const result = await processSingleFileContent(
testPdfFilePath,
mockConfig,
mockConfigNoPdf,
);
expect(
(result.llmContent as { inlineData: unknown }).inlineData,
).toBeDefined();
expect(typeof result.llmContent).toBe('string');
expect(result.llmContent).toContain('does not support pdf input');
expect(result.returnDisplay).toContain('Skipped pdf file');
expect(result.error).toContain('does not support pdf input');
});

it('should accept PDF files when model supports PDF', async () => {
const fakePdfData = Buffer.from('fake pdf data');
actualNodeFs.writeFileSync(testPdfFilePath, fakePdfData);
mockMimeGetType.mockReturnValue('application/pdf');

// Use a model that supports PDF (e.g., Claude)
const mockConfigWithPdf = {
...mockConfig,
getModel: () => 'claude-3-sonnet',
} as unknown as Config;

const result = await processSingleFileContent(
testPdfFilePath,
mockConfigWithPdf,
);
expect(result.llmContent).toHaveProperty('inlineData');
expect(
(result.llmContent as { inlineData: { mimeType: string } }).inlineData
.mimeType,
).toBe('application/pdf');
expect(
(result.llmContent as { inlineData: { data: string } }).inlineData.data,
).toBe(fakePdfData.toString('base64'));
expect(
(result.llmContent as { inlineData: { displayName?: string } })
.inlineData.displayName,
).toBe('document.pdf');
expect(result.returnDisplay).toContain('Read pdf file: document.pdf');
expect(result.returnDisplay).toContain('Read pdf file');
});

it('should read an SVG file as text when under 1MB', async () => {
Expand Down
64 changes: 64 additions & 0 deletions packages/core/src/utils/fileUtils.ts
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,8 @@ import { ToolErrorType } from '../tools/tool-error.js';
import { BINARY_EXTENSIONS } from './ignorePatterns.js';
import type { Config } from '../config/config.js';
import { createDebugLogger } from './debugLogger.js';
import { defaultModalities } from '../core/modalityDefaults.js';
import type { InputModalities } from '../core/contentGenerator.js';

const debugLogger = createDebugLogger('FILE_UTILS');

Expand Down Expand Up @@ -302,6 +304,49 @@ export interface ProcessedFileReadResult {
linesShown?: [number, number]; // For text files [startLine, endLine] (1-based for display)
}

/**
* Maps file type to the corresponding modality flag.
*/
function fileTypeToModalityKey(
fileType: 'image' | 'pdf' | 'audio' | 'video',
): keyof InputModalities {
switch (fileType) {
case 'image':
return 'image';
case 'pdf':
return 'pdf';
case 'audio':
return 'audio';
case 'video':
return 'video';
default:
// This should never happen due to the type constraint
throw new Error(`Unexpected file type: ${fileType}`);
}
}

/**
* Checks if a file type is supported by the model's input modalities.
* @param fileType The detected file type.
* @param modalities The model's supported input modalities.
* @returns True if the file type is supported, false otherwise.
*/
function isFileTypeSupported(
fileType: 'image' | 'pdf' | 'audio' | 'video' | 'text' | 'binary' | 'svg',
modalities: InputModalities,
): boolean {
// Text, binary (rejected separately), and SVG (treated as text) are always supported
if (fileType === 'text' || fileType === 'binary' || fileType === 'svg') {
return true;
}

// Check modalities for media types
const modalityKey = fileTypeToModalityKey(
fileType as 'image' | 'pdf' | 'audio' | 'video',
);
return modalities[modalityKey] === true;
}

/**
* Reads and processes a single file, handling text, images, and PDFs.
* @param filePath Absolute path to the file.
Expand Down Expand Up @@ -356,6 +401,25 @@ export async function processSingleFileContent(
.replace(/\\/g, '/');

const displayName = path.basename(filePath);

// Get the current model's supported modalities
const model = config.getModel();
const modalities = defaultModalities(model);

// Check if the file type is supported by the current model
if (!isFileTypeSupported(fileType, modalities)) {
// At this point, fileType must be a media type (image, pdf, audio, video)
// because text/binary/svg are always supported
const modalityName = fileTypeToModalityKey(
fileType as 'image' | 'pdf' | 'audio' | 'video',
);
return {
llmContent: `The current model "${model}" does not support ${modalityName} input. ${fileType.toUpperCase()} files cannot be read directly.`,
returnDisplay: `Skipped ${fileType} file: ${relativePathForDisplay} (model doesn't support ${modalityName} input)`,
error: `Model "${model}" does not support ${modalityName} input. Please use a model that supports ${modalityName} or convert the file to text externally.`,
};
}

switch (fileType) {
case 'binary': {
return {
Expand Down