Fix duplicate gateway upload filenames (#2789)

This commit is contained in:
ChenglongZ 2026-05-09 18:02:40 +08:00 committed by GitHub
parent 1edc9d9fae
commit 7a3c58a733
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
4 changed files with 44 additions and 2 deletions

View File

@ -520,6 +520,7 @@ Multi-file upload with automatic document conversion:
- Rejects directory inputs before copying so uploads stay all-or-nothing
- Reuses one conversion worker per request when called from an active event loop
- Files stored in thread-isolated directories
- Duplicate filenames in a single upload request are auto-renamed with `_N` suffixes so later files do not truncate earlier files
- Agent receives uploaded file list via `UploadsMiddleware`
See [docs/FILE_UPLOAD.md](docs/FILE_UPLOAD.md) for details.

View File

@ -124,7 +124,7 @@ FastAPI application providing REST endpoints for frontend integration:
| `POST /api/memory/reload` | Force memory reload |
| `GET /api/memory/config` | Memory configuration |
| `GET /api/memory/status` | Combined config + data |
| `POST /api/threads/{id}/uploads` | Upload files (auto-converts PDF/PPT/Excel/Word to Markdown, rejects directory paths) |
| `POST /api/threads/{id}/uploads` | Upload files (auto-converts PDF/PPT/Excel/Word to Markdown, rejects directory paths, auto-renames duplicate filenames in one request) |
| `GET /api/threads/{id}/uploads/list` | List uploaded files |
| `DELETE /api/threads/{id}` | Delete DeerFlow-managed local thread data after LangGraph thread deletion; unexpected failures are logged server-side and return a generic 500 detail |
| `GET /api/threads/{id}/artifacts/{path}` | Serve generated artifacts |

View File

@ -16,6 +16,7 @@ from deerflow.sandbox.sandbox_provider import SandboxProvider, get_sandbox_provi
from deerflow.uploads.manager import (
PathTraversalError,
UnsafeUploadPathError,
claim_unique_filename,
delete_file_safe,
enrich_file_listing,
ensure_uploads_dir,
@ -192,6 +193,10 @@ async def upload_files(
sandbox_sync_targets = []
skipped_files = []
total_size = 0
# Track filenames within this request so duplicate form parts do not
# silently truncate each other. Existing uploads keep the historical
# overwrite behavior for a single replacement upload.
seen_filenames: set[str] = set()
sandbox_provider = get_sandbox_provider()
sync_to_sandbox = not _uses_thread_data_mounts(sandbox_provider)
@ -208,7 +213,8 @@ async def upload_files(
continue
try:
safe_filename = normalize_filename(file.filename)
original_filename = normalize_filename(file.filename)
safe_filename = claim_unique_filename(original_filename, seen_filenames)
except ValueError:
logger.warning(f"Skipping file with unsafe filename: {file.filename!r}")
continue
@ -236,6 +242,8 @@ async def upload_files(
"virtual_path": virtual_path,
"artifact_url": upload_artifact_url(thread_id, safe_filename),
}
if safe_filename != original_filename:
file_info["original_filename"] = original_filename
logger.info(f"Saved file: {safe_filename} ({file_size} bytes) to {file_info['path']}")

View File

@ -61,6 +61,39 @@ def test_upload_files_writes_thread_storage_and_skips_local_sandbox_sync(tmp_pat
sandbox.update_file.assert_not_called()
def test_upload_files_auto_renames_duplicate_form_filenames(tmp_path):
thread_uploads_dir = tmp_path / "uploads"
thread_uploads_dir.mkdir(parents=True)
provider = MagicMock()
provider.uses_thread_data_mounts = True
with (
patch.object(uploads, "get_uploads_dir", return_value=thread_uploads_dir),
patch.object(uploads, "ensure_uploads_dir", return_value=thread_uploads_dir),
patch.object(uploads, "get_sandbox_provider", return_value=provider),
):
result = asyncio.run(
call_unwrapped(
uploads.upload_files,
"thread-local",
request=MagicMock(),
files=[
UploadFile(filename="data.txt", file=BytesIO(b"first")),
UploadFile(filename="data.txt", file=BytesIO(b"second")),
],
config=SimpleNamespace(),
)
)
assert result.success is True
assert [file_info["filename"] for file_info in result.files] == ["data.txt", "data_1.txt"]
assert "original_filename" not in result.files[0]
assert result.files[1]["original_filename"] == "data.txt"
assert (thread_uploads_dir / "data.txt").read_bytes() == b"first"
assert (thread_uploads_dir / "data_1.txt").read_bytes() == b"second"
def test_upload_files_skips_acquire_when_thread_data_is_mounted(tmp_path):
thread_uploads_dir = tmp_path / "uploads"
thread_uploads_dir.mkdir(parents=True)