mirror of
https://github.com/bytedance/deer-flow.git
synced 2026-06-09 17:12:01 +00:00
fix upload file size contract (#3408)
This commit is contained in:
parent
dd8f9bf5f0
commit
1aac408dd0
@ -39,15 +39,39 @@ DEFAULT_MAX_FILE_SIZE = 50 * 1024 * 1024
|
||||
DEFAULT_MAX_TOTAL_SIZE = 100 * 1024 * 1024
|
||||
|
||||
|
||||
class UploadedFileInfo(BaseModel):
|
||||
"""Uploaded file metadata exposed by upload and list APIs."""
|
||||
|
||||
filename: str
|
||||
size: int
|
||||
path: str
|
||||
virtual_path: str
|
||||
artifact_url: str
|
||||
extension: str | None = None
|
||||
modified: float | None = None
|
||||
original_filename: str | None = None
|
||||
markdown_file: str | None = None
|
||||
markdown_path: str | None = None
|
||||
markdown_virtual_path: str | None = None
|
||||
markdown_artifact_url: str | None = None
|
||||
|
||||
|
||||
class UploadResponse(BaseModel):
|
||||
"""Response model for file upload."""
|
||||
|
||||
success: bool
|
||||
files: list[dict[str, str]]
|
||||
files: list[UploadedFileInfo]
|
||||
message: str
|
||||
skipped_files: list[str] = Field(default_factory=list)
|
||||
|
||||
|
||||
class UploadListResponse(BaseModel):
|
||||
"""Response model for uploaded file listing."""
|
||||
|
||||
files: list[UploadedFileInfo]
|
||||
count: int
|
||||
|
||||
|
||||
class UploadLimits(BaseModel):
|
||||
"""Application-level upload limits exposed to clients."""
|
||||
|
||||
@ -256,7 +280,7 @@ async def upload_files(
|
||||
|
||||
file_info = {
|
||||
"filename": safe_filename,
|
||||
"size": str(file_size),
|
||||
"size": file_size,
|
||||
"path": str(sandbox_uploads / safe_filename),
|
||||
"virtual_path": virtual_path,
|
||||
"artifact_url": upload_artifact_url(thread_id, safe_filename),
|
||||
@ -333,9 +357,9 @@ async def get_upload_limits(
|
||||
return _get_upload_limits(config)
|
||||
|
||||
|
||||
@router.get("/list", response_model=dict)
|
||||
@router.get("/list", response_model=UploadListResponse)
|
||||
@require_permission("threads", "read", owner_check=True)
|
||||
async def list_uploaded_files(thread_id: str, request: Request) -> dict:
|
||||
async def list_uploaded_files(thread_id: str, request: Request) -> UploadListResponse:
|
||||
"""List all files in a thread's uploads directory."""
|
||||
try:
|
||||
uploads_dir = get_uploads_dir(thread_id)
|
||||
@ -349,7 +373,7 @@ async def list_uploaded_files(thread_id: str, request: Request) -> dict:
|
||||
for f in result["files"]:
|
||||
f["path"] = str(sandbox_uploads / f["filename"])
|
||||
|
||||
return result
|
||||
return UploadListResponse(**result)
|
||||
|
||||
|
||||
@router.delete("/{filename}")
|
||||
|
||||
@ -1209,7 +1209,7 @@ class DeerFlowClient:
|
||||
|
||||
info: dict[str, Any] = {
|
||||
"filename": dest_name,
|
||||
"size": str(dest.stat().st_size),
|
||||
"size": dest.stat().st_size,
|
||||
"path": str(dest),
|
||||
"virtual_path": upload_virtual_path(dest_name),
|
||||
"artifact_url": upload_artifact_url(thread_id, dest_name),
|
||||
|
||||
@ -226,8 +226,7 @@ def list_files_in_dir(directory: Path) -> dict:
|
||||
Returns:
|
||||
Dict with "files" list (sorted by name) and "count".
|
||||
Each file entry has ``size`` as *int* (bytes). Call
|
||||
:func:`enrich_file_listing` to stringify sizes and add
|
||||
virtual / artifact URLs.
|
||||
:func:`enrich_file_listing` to add virtual / artifact URLs.
|
||||
"""
|
||||
if not directory.is_dir():
|
||||
return {"files": [], "count": 0}
|
||||
@ -298,13 +297,12 @@ def upload_virtual_path(filename: str) -> str:
|
||||
|
||||
|
||||
def enrich_file_listing(result: dict, thread_id: str) -> dict:
|
||||
"""Add virtual paths, artifact URLs, and stringify sizes on a listing result.
|
||||
"""Add virtual paths and artifact URLs on a listing result.
|
||||
|
||||
Mutates *result* in place and returns it for convenience.
|
||||
"""
|
||||
for f in result["files"]:
|
||||
filename = f["filename"]
|
||||
f["size"] = str(f["size"])
|
||||
f["virtual_path"] = upload_virtual_path(filename)
|
||||
f["artifact_url"] = upload_artifact_url(thread_id, filename)
|
||||
return result
|
||||
|
||||
@ -1472,6 +1472,7 @@ class TestUploads:
|
||||
assert result["success"] is True
|
||||
assert len(result["files"]) == 1
|
||||
assert result["files"][0]["filename"] == "test.txt"
|
||||
assert result["files"][0]["size"] == len("hello")
|
||||
assert "artifact_url" in result["files"][0]
|
||||
assert "message" in result
|
||||
assert (uploads_dir / "test.txt").exists()
|
||||
@ -1551,6 +1552,8 @@ class TestUploads:
|
||||
assert len(result["files"]) == 2
|
||||
names = {f["filename"] for f in result["files"]}
|
||||
assert names == {"a.txt", "b.txt"}
|
||||
sizes = {f["filename"]: f["size"] for f in result["files"]}
|
||||
assert sizes == {"a.txt": 1, "b.txt": 2}
|
||||
# Verify artifact_url is present
|
||||
for f in result["files"]:
|
||||
assert "artifact_url" in f
|
||||
@ -2458,6 +2461,7 @@ class TestGatewayConformance:
|
||||
parsed = UploadResponse(**result)
|
||||
assert parsed.success is True
|
||||
assert len(parsed.files) == 1
|
||||
assert parsed.files[0].size == len("hello")
|
||||
|
||||
def test_get_memory_config(self, client):
|
||||
mem_cfg = MagicMock()
|
||||
|
||||
@ -56,12 +56,40 @@ def test_upload_files_writes_thread_storage_and_skips_local_sandbox_sync(tmp_pat
|
||||
|
||||
assert result.success is True
|
||||
assert len(result.files) == 1
|
||||
assert result.files[0]["filename"] == "notes.txt"
|
||||
assert result.files[0].filename == "notes.txt"
|
||||
assert result.files[0].size == len(b"hello uploads")
|
||||
assert (thread_uploads_dir / "notes.txt").read_bytes() == b"hello uploads"
|
||||
|
||||
sandbox.update_file.assert_not_called()
|
||||
|
||||
|
||||
def test_upload_and_list_response_models_expose_size_as_int(tmp_path):
|
||||
thread_uploads_dir = tmp_path / "uploads"
|
||||
thread_uploads_dir.mkdir(parents=True)
|
||||
(thread_uploads_dir / "notes.txt").write_bytes(b"hello uploads")
|
||||
|
||||
paths = MagicMock()
|
||||
paths.sandbox_uploads_dir.return_value = thread_uploads_dir
|
||||
|
||||
with (
|
||||
patch.object(uploads, "get_uploads_dir", return_value=thread_uploads_dir),
|
||||
patch.object(uploads, "get_paths", return_value=paths),
|
||||
):
|
||||
result = asyncio.run(call_unwrapped(uploads.list_uploaded_files, "thread-local", request=MagicMock()))
|
||||
|
||||
assert result.count == 1
|
||||
assert result.files[0].filename == "notes.txt"
|
||||
assert result.files[0].size == len(b"hello uploads")
|
||||
|
||||
|
||||
def test_upload_openapi_schema_exposes_file_size_as_integer():
|
||||
upload_schema = uploads.UploadResponse.model_json_schema()
|
||||
list_schema = uploads.UploadListResponse.model_json_schema()
|
||||
|
||||
assert upload_schema["$defs"]["UploadedFileInfo"]["properties"]["size"]["type"] == "integer"
|
||||
assert list_schema["$defs"]["UploadedFileInfo"]["properties"]["size"]["type"] == "integer"
|
||||
|
||||
|
||||
def test_upload_files_auto_renames_duplicate_form_filenames(tmp_path):
|
||||
thread_uploads_dir = tmp_path / "uploads"
|
||||
thread_uploads_dir.mkdir(parents=True)
|
||||
@ -88,9 +116,9 @@ def test_upload_files_auto_renames_duplicate_form_filenames(tmp_path):
|
||||
)
|
||||
|
||||
assert result.success is True
|
||||
assert [file_info["filename"] for file_info in result.files] == ["data.txt", "data_1.txt"]
|
||||
assert "original_filename" not in result.files[0]
|
||||
assert result.files[1]["original_filename"] == "data.txt"
|
||||
assert [file_info.filename for file_info in result.files] == ["data.txt", "data_1.txt"]
|
||||
assert result.files[0].original_filename is None
|
||||
assert result.files[1].original_filename == "data.txt"
|
||||
assert (thread_uploads_dir / "data.txt").read_bytes() == b"first"
|
||||
assert (thread_uploads_dir / "data_1.txt").read_bytes() == b"second"
|
||||
|
||||
@ -138,8 +166,8 @@ def test_upload_files_does_not_auto_convert_documents_by_default(tmp_path):
|
||||
|
||||
assert result.success is True
|
||||
assert len(result.files) == 1
|
||||
assert result.files[0]["filename"] == "report.pdf"
|
||||
assert "markdown_file" not in result.files[0]
|
||||
assert result.files[0].filename == "report.pdf"
|
||||
assert result.files[0].markdown_file is None
|
||||
convert_mock.assert_not_called()
|
||||
assert not (thread_uploads_dir / "report.md").exists()
|
||||
|
||||
@ -172,8 +200,8 @@ def test_upload_files_syncs_non_local_sandbox_and_marks_markdown_file(tmp_path):
|
||||
assert result.success is True
|
||||
assert len(result.files) == 1
|
||||
file_info = result.files[0]
|
||||
assert file_info["filename"] == "report.pdf"
|
||||
assert file_info["markdown_file"] == "report.md"
|
||||
assert file_info.filename == "report.pdf"
|
||||
assert file_info.markdown_file == "report.md"
|
||||
|
||||
assert (thread_uploads_dir / "report.pdf").read_bytes() == b"pdf-bytes"
|
||||
assert (thread_uploads_dir / "report.md").read_text(encoding="utf-8") == "converted"
|
||||
@ -516,7 +544,7 @@ def test_upload_files_rejects_dotdot_and_dot_filenames(tmp_path):
|
||||
result = asyncio.run(call_unwrapped(uploads.upload_files, "thread-local", request=MagicMock(), files=[file], config=SimpleNamespace()))
|
||||
assert result.success is True
|
||||
assert len(result.files) == 1
|
||||
assert result.files[0]["filename"] == "passwd"
|
||||
assert result.files[0].filename == "passwd"
|
||||
|
||||
# Only the safely normalised file should exist
|
||||
assert [f.name for f in thread_uploads_dir.iterdir()] == ["passwd"]
|
||||
@ -616,7 +644,7 @@ def test_upload_files_overwrites_existing_regular_file(tmp_path):
|
||||
result = asyncio.run(uploads.upload_files("thread-local", files=[file]))
|
||||
|
||||
assert result.success is True
|
||||
assert [file_info["filename"] for file_info in result.files] == ["notes.txt"]
|
||||
assert [file_info.filename for file_info in result.files] == ["notes.txt"]
|
||||
assert existing_file.read_bytes() == b"new upload"
|
||||
assert existing_file.stat().st_nlink == 1
|
||||
|
||||
|
||||
@ -23,6 +23,7 @@ export interface UploadResponse {
|
||||
success: boolean;
|
||||
files: UploadedFileInfo[];
|
||||
message: string;
|
||||
skipped_files: string[];
|
||||
}
|
||||
|
||||
export interface ListFilesResponse {
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user