diff --git a/backend/CLAUDE.md b/backend/CLAUDE.md index 080c61cad..d03aeefd8 100644 --- a/backend/CLAUDE.md +++ b/backend/CLAUDE.md @@ -520,6 +520,7 @@ Multi-file upload with automatic document conversion: - Rejects directory inputs before copying so uploads stay all-or-nothing - Reuses one conversion worker per request when called from an active event loop - Files stored in thread-isolated directories +- Duplicate filenames in a single upload request are auto-renamed with `_N` suffixes so later files do not truncate earlier files - Agent receives uploaded file list via `UploadsMiddleware` See [docs/FILE_UPLOAD.md](docs/FILE_UPLOAD.md) for details. diff --git a/backend/README.md b/backend/README.md index 158540946..0e2d966ee 100644 --- a/backend/README.md +++ b/backend/README.md @@ -124,7 +124,7 @@ FastAPI application providing REST endpoints for frontend integration: | `POST /api/memory/reload` | Force memory reload | | `GET /api/memory/config` | Memory configuration | | `GET /api/memory/status` | Combined config + data | -| `POST /api/threads/{id}/uploads` | Upload files (auto-converts PDF/PPT/Excel/Word to Markdown, rejects directory paths) | +| `POST /api/threads/{id}/uploads` | Upload files (auto-converts PDF/PPT/Excel/Word to Markdown, rejects directory paths, auto-renames duplicate filenames in one request) | | `GET /api/threads/{id}/uploads/list` | List uploaded files | | `DELETE /api/threads/{id}` | Delete DeerFlow-managed local thread data after LangGraph thread deletion; unexpected failures are logged server-side and return a generic 500 detail | | `GET /api/threads/{id}/artifacts/{path}` | Serve generated artifacts | diff --git a/backend/app/gateway/routers/uploads.py b/backend/app/gateway/routers/uploads.py index a4267f728..386618725 100644 --- a/backend/app/gateway/routers/uploads.py +++ b/backend/app/gateway/routers/uploads.py @@ -16,6 +16,7 @@ from deerflow.sandbox.sandbox_provider import SandboxProvider, get_sandbox_provi from deerflow.uploads.manager import ( PathTraversalError, UnsafeUploadPathError, + claim_unique_filename, delete_file_safe, enrich_file_listing, ensure_uploads_dir, @@ -192,6 +193,10 @@ async def upload_files( sandbox_sync_targets = [] skipped_files = [] total_size = 0 + # Track filenames within this request so duplicate form parts do not + # silently truncate each other. Existing uploads keep the historical + # overwrite behavior for a single replacement upload. + seen_filenames: set[str] = set() sandbox_provider = get_sandbox_provider() sync_to_sandbox = not _uses_thread_data_mounts(sandbox_provider) @@ -208,7 +213,8 @@ async def upload_files( continue try: - safe_filename = normalize_filename(file.filename) + original_filename = normalize_filename(file.filename) + safe_filename = claim_unique_filename(original_filename, seen_filenames) except ValueError: logger.warning(f"Skipping file with unsafe filename: {file.filename!r}") continue @@ -236,6 +242,8 @@ async def upload_files( "virtual_path": virtual_path, "artifact_url": upload_artifact_url(thread_id, safe_filename), } + if safe_filename != original_filename: + file_info["original_filename"] = original_filename logger.info(f"Saved file: {safe_filename} ({file_size} bytes) to {file_info['path']}") diff --git a/backend/tests/test_uploads_router.py b/backend/tests/test_uploads_router.py index 4a778345f..7846865b8 100644 --- a/backend/tests/test_uploads_router.py +++ b/backend/tests/test_uploads_router.py @@ -61,6 +61,39 @@ def test_upload_files_writes_thread_storage_and_skips_local_sandbox_sync(tmp_pat sandbox.update_file.assert_not_called() +def test_upload_files_auto_renames_duplicate_form_filenames(tmp_path): + thread_uploads_dir = tmp_path / "uploads" + thread_uploads_dir.mkdir(parents=True) + + provider = MagicMock() + provider.uses_thread_data_mounts = True + + with ( + patch.object(uploads, "get_uploads_dir", return_value=thread_uploads_dir), + patch.object(uploads, "ensure_uploads_dir", return_value=thread_uploads_dir), + patch.object(uploads, "get_sandbox_provider", return_value=provider), + ): + result = asyncio.run( + call_unwrapped( + uploads.upload_files, + "thread-local", + request=MagicMock(), + files=[ + UploadFile(filename="data.txt", file=BytesIO(b"first")), + UploadFile(filename="data.txt", file=BytesIO(b"second")), + ], + config=SimpleNamespace(), + ) + ) + + assert result.success is True + assert [file_info["filename"] for file_info in result.files] == ["data.txt", "data_1.txt"] + assert "original_filename" not in result.files[0] + assert result.files[1]["original_filename"] == "data.txt" + assert (thread_uploads_dir / "data.txt").read_bytes() == b"first" + assert (thread_uploads_dir / "data_1.txt").read_bytes() == b"second" + + def test_upload_files_skips_acquire_when_thread_data_is_mounted(tmp_path): thread_uploads_dir = tmp_path / "uploads" thread_uploads_dir.mkdir(parents=True)