fix upload file size contract (#3408)

This commit is contained in:
Nan Gao 2026-06-06 09:12:17 +02:00 committed by GitHub
parent dd8f9bf5f0
commit 1aac408dd0
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
6 changed files with 75 additions and 20 deletions

View File

@ -39,15 +39,39 @@ DEFAULT_MAX_FILE_SIZE = 50 * 1024 * 1024
DEFAULT_MAX_TOTAL_SIZE = 100 * 1024 * 1024
class UploadedFileInfo(BaseModel):
"""Uploaded file metadata exposed by upload and list APIs."""
filename: str
size: int
path: str
virtual_path: str
artifact_url: str
extension: str | None = None
modified: float | None = None
original_filename: str | None = None
markdown_file: str | None = None
markdown_path: str | None = None
markdown_virtual_path: str | None = None
markdown_artifact_url: str | None = None
class UploadResponse(BaseModel):
"""Response model for file upload."""
success: bool
files: list[dict[str, str]]
files: list[UploadedFileInfo]
message: str
skipped_files: list[str] = Field(default_factory=list)
class UploadListResponse(BaseModel):
"""Response model for uploaded file listing."""
files: list[UploadedFileInfo]
count: int
class UploadLimits(BaseModel):
"""Application-level upload limits exposed to clients."""
@ -256,7 +280,7 @@ async def upload_files(
file_info = {
"filename": safe_filename,
"size": str(file_size),
"size": file_size,
"path": str(sandbox_uploads / safe_filename),
"virtual_path": virtual_path,
"artifact_url": upload_artifact_url(thread_id, safe_filename),
@ -333,9 +357,9 @@ async def get_upload_limits(
return _get_upload_limits(config)
@router.get("/list", response_model=dict)
@router.get("/list", response_model=UploadListResponse)
@require_permission("threads", "read", owner_check=True)
async def list_uploaded_files(thread_id: str, request: Request) -> dict:
async def list_uploaded_files(thread_id: str, request: Request) -> UploadListResponse:
"""List all files in a thread's uploads directory."""
try:
uploads_dir = get_uploads_dir(thread_id)
@ -349,7 +373,7 @@ async def list_uploaded_files(thread_id: str, request: Request) -> dict:
for f in result["files"]:
f["path"] = str(sandbox_uploads / f["filename"])
return result
return UploadListResponse(**result)
@router.delete("/{filename}")

View File

@ -1209,7 +1209,7 @@ class DeerFlowClient:
info: dict[str, Any] = {
"filename": dest_name,
"size": str(dest.stat().st_size),
"size": dest.stat().st_size,
"path": str(dest),
"virtual_path": upload_virtual_path(dest_name),
"artifact_url": upload_artifact_url(thread_id, dest_name),

View File

@ -226,8 +226,7 @@ def list_files_in_dir(directory: Path) -> dict:
Returns:
Dict with "files" list (sorted by name) and "count".
Each file entry has ``size`` as *int* (bytes). Call
:func:`enrich_file_listing` to stringify sizes and add
virtual / artifact URLs.
:func:`enrich_file_listing` to add virtual / artifact URLs.
"""
if not directory.is_dir():
return {"files": [], "count": 0}
@ -298,13 +297,12 @@ def upload_virtual_path(filename: str) -> str:
def enrich_file_listing(result: dict, thread_id: str) -> dict:
"""Add virtual paths, artifact URLs, and stringify sizes on a listing result.
"""Add virtual paths and artifact URLs on a listing result.
Mutates *result* in place and returns it for convenience.
"""
for f in result["files"]:
filename = f["filename"]
f["size"] = str(f["size"])
f["virtual_path"] = upload_virtual_path(filename)
f["artifact_url"] = upload_artifact_url(thread_id, filename)
return result

View File

@ -1472,6 +1472,7 @@ class TestUploads:
assert result["success"] is True
assert len(result["files"]) == 1
assert result["files"][0]["filename"] == "test.txt"
assert result["files"][0]["size"] == len("hello")
assert "artifact_url" in result["files"][0]
assert "message" in result
assert (uploads_dir / "test.txt").exists()
@ -1551,6 +1552,8 @@ class TestUploads:
assert len(result["files"]) == 2
names = {f["filename"] for f in result["files"]}
assert names == {"a.txt", "b.txt"}
sizes = {f["filename"]: f["size"] for f in result["files"]}
assert sizes == {"a.txt": 1, "b.txt": 2}
# Verify artifact_url is present
for f in result["files"]:
assert "artifact_url" in f
@ -2458,6 +2461,7 @@ class TestGatewayConformance:
parsed = UploadResponse(**result)
assert parsed.success is True
assert len(parsed.files) == 1
assert parsed.files[0].size == len("hello")
def test_get_memory_config(self, client):
mem_cfg = MagicMock()

View File

@ -56,12 +56,40 @@ def test_upload_files_writes_thread_storage_and_skips_local_sandbox_sync(tmp_pat
assert result.success is True
assert len(result.files) == 1
assert result.files[0]["filename"] == "notes.txt"
assert result.files[0].filename == "notes.txt"
assert result.files[0].size == len(b"hello uploads")
assert (thread_uploads_dir / "notes.txt").read_bytes() == b"hello uploads"
sandbox.update_file.assert_not_called()
def test_upload_and_list_response_models_expose_size_as_int(tmp_path):
thread_uploads_dir = tmp_path / "uploads"
thread_uploads_dir.mkdir(parents=True)
(thread_uploads_dir / "notes.txt").write_bytes(b"hello uploads")
paths = MagicMock()
paths.sandbox_uploads_dir.return_value = thread_uploads_dir
with (
patch.object(uploads, "get_uploads_dir", return_value=thread_uploads_dir),
patch.object(uploads, "get_paths", return_value=paths),
):
result = asyncio.run(call_unwrapped(uploads.list_uploaded_files, "thread-local", request=MagicMock()))
assert result.count == 1
assert result.files[0].filename == "notes.txt"
assert result.files[0].size == len(b"hello uploads")
def test_upload_openapi_schema_exposes_file_size_as_integer():
upload_schema = uploads.UploadResponse.model_json_schema()
list_schema = uploads.UploadListResponse.model_json_schema()
assert upload_schema["$defs"]["UploadedFileInfo"]["properties"]["size"]["type"] == "integer"
assert list_schema["$defs"]["UploadedFileInfo"]["properties"]["size"]["type"] == "integer"
def test_upload_files_auto_renames_duplicate_form_filenames(tmp_path):
thread_uploads_dir = tmp_path / "uploads"
thread_uploads_dir.mkdir(parents=True)
@ -88,9 +116,9 @@ def test_upload_files_auto_renames_duplicate_form_filenames(tmp_path):
)
assert result.success is True
assert [file_info["filename"] for file_info in result.files] == ["data.txt", "data_1.txt"]
assert "original_filename" not in result.files[0]
assert result.files[1]["original_filename"] == "data.txt"
assert [file_info.filename for file_info in result.files] == ["data.txt", "data_1.txt"]
assert result.files[0].original_filename is None
assert result.files[1].original_filename == "data.txt"
assert (thread_uploads_dir / "data.txt").read_bytes() == b"first"
assert (thread_uploads_dir / "data_1.txt").read_bytes() == b"second"
@ -138,8 +166,8 @@ def test_upload_files_does_not_auto_convert_documents_by_default(tmp_path):
assert result.success is True
assert len(result.files) == 1
assert result.files[0]["filename"] == "report.pdf"
assert "markdown_file" not in result.files[0]
assert result.files[0].filename == "report.pdf"
assert result.files[0].markdown_file is None
convert_mock.assert_not_called()
assert not (thread_uploads_dir / "report.md").exists()
@ -172,8 +200,8 @@ def test_upload_files_syncs_non_local_sandbox_and_marks_markdown_file(tmp_path):
assert result.success is True
assert len(result.files) == 1
file_info = result.files[0]
assert file_info["filename"] == "report.pdf"
assert file_info["markdown_file"] == "report.md"
assert file_info.filename == "report.pdf"
assert file_info.markdown_file == "report.md"
assert (thread_uploads_dir / "report.pdf").read_bytes() == b"pdf-bytes"
assert (thread_uploads_dir / "report.md").read_text(encoding="utf-8") == "converted"
@ -516,7 +544,7 @@ def test_upload_files_rejects_dotdot_and_dot_filenames(tmp_path):
result = asyncio.run(call_unwrapped(uploads.upload_files, "thread-local", request=MagicMock(), files=[file], config=SimpleNamespace()))
assert result.success is True
assert len(result.files) == 1
assert result.files[0]["filename"] == "passwd"
assert result.files[0].filename == "passwd"
# Only the safely normalised file should exist
assert [f.name for f in thread_uploads_dir.iterdir()] == ["passwd"]
@ -616,7 +644,7 @@ def test_upload_files_overwrites_existing_regular_file(tmp_path):
result = asyncio.run(uploads.upload_files("thread-local", files=[file]))
assert result.success is True
assert [file_info["filename"] for file_info in result.files] == ["notes.txt"]
assert [file_info.filename for file_info in result.files] == ["notes.txt"]
assert existing_file.read_bytes() == b"new upload"
assert existing_file.stat().st_nlink == 1

View File

@ -23,6 +23,7 @@ export interface UploadResponse {
success: boolean;
files: UploadedFileInfo[];
message: string;
skipped_files: string[];
}
export interface ListFilesResponse {