diff --git a/backend/packages/harness/deerflow/agents/middlewares/uploads_middleware.py b/backend/packages/harness/deerflow/agents/middlewares/uploads_middleware.py
index a0c48969c..78c9a7b7b 100644
--- a/backend/packages/harness/deerflow/agents/middlewares/uploads_middleware.py
+++ b/backend/packages/harness/deerflow/agents/middlewares/uploads_middleware.py
@@ -15,20 +15,45 @@ from deerflow.utils.file_conversion import extract_outline
 logger = logging.getLogger(__name__)
 
 
-def _extract_outline_for_file(file_path: Path) -> list[dict]:
-    """Return the document outline for *file_path* if a converted .md exists.
+_OUTLINE_PREVIEW_LINES = 5
+
+
+def _extract_outline_for_file(file_path: Path) -> tuple[list[dict], list[str]]:
+    """Return the document outline and fallback preview for *file_path*.
 
     Looks for a sibling ``<stem>.md`` file produced by the upload conversion
-    pipeline.  Returns an empty list when the file is not a converted document
-    or when no headings are found.
+    pipeline.
+
+    Returns:
+        (outline, preview) where:
+        - outline: list of ``{title, line}`` dicts (plus optional sentinel).
+          Empty when no headings are found or no .md exists.
+        - preview: first few non-empty lines of the .md, used as a content
+          anchor when outline is empty so the agent has some context.
+          Empty when outline is non-empty (no fallback needed).
     """
     md_path = file_path.with_suffix(".md")
     if not md_path.is_file():
-        return []
+        return [], []
+
     outline = extract_outline(md_path)
     if outline:
         logger.debug("Extracted %d outline entries from %s", len(outline), file_path.name)
-    return outline
+        return outline, []
+
+    # outline is empty — read the first few non-empty lines as a content preview
+    preview: list[str] = []
+    try:
+        with md_path.open(encoding="utf-8") as f:
+            for line in f:
+                stripped = line.strip()
+                if stripped:
+                    preview.append(stripped)
+                if len(preview) >= _OUTLINE_PREVIEW_LINES:
+                    break
+    except Exception:
+        logger.debug("Failed to read preview lines from %s", md_path, exc_info=True)
+    return [], preview
 
 
 class UploadsMiddlewareState(AgentState):
@@ -64,13 +89,20 @@ class UploadsMiddleware(AgentMiddleware[UploadsMiddlewareState]):
         lines.append(f"  Path: {file['path']}")
         outline = file.get("outline") or []
         if outline:
-            truncated = outline[-1].get("truncated", False) if outline else False
+            truncated = outline[-1].get("truncated", False)
             visible = [e for e in outline if not e.get("truncated")]
             lines.append("  Document outline (use `read_file` with line ranges to read sections):")
             for entry in visible:
                 lines.append(f"    L{entry['line']}: {entry['title']}")
             if truncated:
                 lines.append(f"    ... (showing first {len(visible)} headings; use `read_file` to explore further)")
+        else:
+            preview = file.get("outline_preview") or []
+            if preview:
+                lines.append("  No structural headings detected. Document begins with:")
+                for text in preview:
+                    lines.append(f"    > {text}")
+            lines.append("  Use `grep` to search for keywords (e.g. `grep(pattern='keyword', path='/mnt/user-data/uploads/')`).")
         lines.append("")
 
     def _create_files_message(self, new_files: list[dict], historical_files: list[dict]) -> str:
@@ -201,13 +233,15 @@ class UploadsMiddleware(AgentMiddleware[UploadsMiddlewareState]):
             for file_path in sorted(uploads_dir.iterdir()):
                 if file_path.is_file() and file_path.name not in new_filenames:
                     stat = file_path.stat()
+                    outline, preview = _extract_outline_for_file(file_path)
                     historical_files.append(
                         {
                             "filename": file_path.name,
                             "size": stat.st_size,
                             "path": f"/mnt/user-data/uploads/{file_path.name}",
                             "extension": file_path.suffix,
-                            "outline": _extract_outline_for_file(file_path),
+                            "outline": outline,
+                            "outline_preview": preview,
                         }
                     )
 
@@ -215,7 +249,9 @@ class UploadsMiddleware(AgentMiddleware[UploadsMiddlewareState]):
         if uploads_dir:
             for file in new_files:
                 phys_path = uploads_dir / file["filename"]
-                file["outline"] = _extract_outline_for_file(phys_path)
+                outline, preview = _extract_outline_for_file(phys_path)
+                file["outline"] = outline
+                file["outline_preview"] = preview
 
         if not new_files and not historical_files:
             return None
diff --git a/backend/packages/harness/deerflow/utils/file_conversion.py b/backend/packages/harness/deerflow/utils/file_conversion.py
index 9a180883d..68755b675 100644
--- a/backend/packages/harness/deerflow/utils/file_conversion.py
+++ b/backend/packages/harness/deerflow/utils/file_conversion.py
@@ -182,6 +182,19 @@ async def convert_file_to_markdown(file_path: Path) -> Path | None:
 # by pymupdf4llm, so they don't need this pattern.
 _BOLD_HEADING_RE = re.compile(r"^\*\*((ITEM|PART|SECTION|SCHEDULE|EXHIBIT|APPENDIX|ANNEX|CHAPTER)\b[A-Z0-9 .,\-]*)\*\*\s*$")
 
+# Regex for split-bold headings produced by pymupdf4llm when a heading spans
+# multiple text spans in the PDF (e.g. section number and title are separate spans).
+# Matches lines like:  **1** **Introduction**  or  **3.2** **Multi-Head Attention**
+# Requirements:
+#   1. Entire line consists only of **...** blocks separated by whitespace (no prose)
+#   2. First block is a section number (digits and dots, e.g. "1", "3.2", "A.1")
+#   3. Second block must not be purely numeric/punctuation — excludes financial table
+#      headers like **2023** **2022** **2021** while allowing non-ASCII titles such as
+#      **1** **概述** or accented words (negative lookahead instead of [A-Za-z])
+#   4. At most two additional blocks (four total) with [^*]+ (no * inside) to keep
+#      the regex linear and avoid ReDoS on attacker-controlled content
+_SPLIT_BOLD_HEADING_RE = re.compile(r"^\*\*[\dA-Z][\d\.]*\*\*\s+\*\*(?!\d[\d\s.,\-–—/:()%]*\*\*)[^*]+\*\*(?:\s+\*\*[^*]+\*\*){0,2}\s*$")
+
 # Maximum number of outline entries injected into the agent context.
 # Keeps prompt size bounded even for very long documents.
 MAX_OUTLINE_ENTRIES = 50
@@ -189,14 +202,43 @@ MAX_OUTLINE_ENTRIES = 50
 _ALLOWED_PDF_CONVERTERS = {"auto", "pymupdf4llm", "markitdown"}
 
 
+def _clean_bold_title(raw: str) -> str:
+    """Normalise a title string that may contain pymupdf4llm bold artefacts.
+
+    pymupdf4llm sometimes emits adjacent bold spans as ``**A** **B**`` instead
+    of a single ``**A B**`` block.  This helper merges those fragments and then
+    strips the outermost ``**...**`` wrapper so the caller gets plain text.
+
+    Examples::
+
+        "**Overview**"                       → "Overview"
+        "**UNITED STATES** **SECURITIES**"   → "UNITED STATES SECURITIES"
+        "plain text"                         → "plain text"  (unchanged)
+    """
+    # Merge adjacent bold spans: "** **" → " "
+    merged = re.sub(r"\*\*\s*\*\*", " ", raw).strip()
+    # Strip outermost **...** if the whole string is wrapped
+    if m := re.fullmatch(r"\*\*(.+?)\*\*", merged, re.DOTALL):
+        return m.group(1).strip()
+    return merged
+
+
 def extract_outline(md_path: Path) -> list[dict]:
     """Extract document outline (headings) from a Markdown file.
 
-    Recognises two heading styles produced by pymupdf4llm:
-    1. Standard Markdown headings: lines starting with one or more '#'
-    2. Bold-only structural headings: **ITEM 1. BUSINESS**, **PART II**, etc.
-       (SEC filings use bold+caps for section headings with the same font size
-       as body text, so pymupdf4llm cannot promote them to # headings)
+    Recognises three heading styles produced by pymupdf4llm:
+
+    1. Standard Markdown headings: lines starting with one or more '#'.
+       Inline ``**...**`` wrappers and adjacent bold spans (``** **``) are
+       cleaned so the title is plain text.
+
+    2. Bold-only structural headings: ``**ITEM 1. BUSINESS**``, ``**PART II**``,
+       etc.  SEC filings use bold+caps for section headings with the same font
+       size as body text, so pymupdf4llm cannot promote them to # headings.
+
+    3. Split-bold headings: ``**1** **Introduction**``, ``**3.2** **Attention**``.
+       pymupdf4llm emits these when the section number and title text are
+       separate spans in the underlying PDF (common in academic papers).
 
     Args:
         md_path: Path to the .md file.
@@ -218,19 +260,23 @@ def extract_outline(md_path: Path) -> list[dict]:
 
                 # Style 1: standard Markdown heading
                 if stripped.startswith("#"):
-                    title = stripped.lstrip("#").strip()
-                    # Strip any inline **...** wrapping (e.g. "## **Overview**" → "Overview")
+                    title = _clean_bold_title(stripped.lstrip("#").strip())
                     if title:
-                        if m2 := re.fullmatch(r"\*\*(.+?)\*\*", title):
-                            title = m2.group(1).strip()
                         outline.append({"title": title, "line": lineno})
 
-                # Style 2: bold-only line (entire line is **...**)
+                # Style 2: single bold block with SEC structural keyword
                 elif m := _BOLD_HEADING_RE.match(stripped):
                     title = m.group(1).strip()
                     if title:
                         outline.append({"title": title, "line": lineno})
 
+                # Style 3: split-bold heading — **<num>** **<title>**
+                # Regex already enforces max 4 blocks and non-numeric second block.
+                elif _SPLIT_BOLD_HEADING_RE.match(stripped):
+                    title = " ".join(re.findall(r"\*\*([^*]+)\*\*", stripped))
+                    if title:
+                        outline.append({"title": title, "line": lineno})
+
                 if len(outline) >= MAX_OUTLINE_ENTRIES:
                     outline.append({"truncated": True})
                     break
diff --git a/backend/tests/test_file_conversion.py b/backend/tests/test_file_conversion.py
index b2ad2d035..42abd3b4a 100644
--- a/backend/tests/test_file_conversion.py
+++ b/backend/tests/test_file_conversion.py
@@ -420,3 +420,40 @@ class TestExtractOutline:
         )
         outline = extract_outline(md)
         assert outline == []
+
+    def test_split_bold_heading_academic_paper(self, tmp_path):
+        """**<num>** **<title>** lines from academic papers are recognised (Style 3)."""
+        md = tmp_path / "paper.md"
+        md.write_text(
+            "## **Attention Is All You Need**\n\n**1** **Introduction**\n\nBody text.\n\n**2** **Background**\n\nMore text.\n\n**3.1** **Encoder and Decoder Stacks**\n",
+            encoding="utf-8",
+        )
+        outline = extract_outline(md)
+        titles = [e["title"] for e in outline]
+        assert "1 Introduction" in titles
+        assert "2 Background" in titles
+        assert "3.1 Encoder and Decoder Stacks" in titles
+
+    def test_split_bold_year_columns_excluded(self, tmp_path):
+        """Financial table headers like **2023** **2022** **2021** are NOT headings."""
+        md = tmp_path / "annual.md"
+        md.write_text(
+            "# Financial Summary\n\n**2023** **2022** **2021**\n\nRevenue 100 90 80\n",
+            encoding="utf-8",
+        )
+        outline = extract_outline(md)
+        titles = [e["title"] for e in outline]
+        # Only the # heading should appear, not the year-column row
+        assert titles == ["Financial Summary"]
+
+    def test_adjacent_bold_spans_merged_in_markdown_heading(self, tmp_path):
+        """** ** artefacts inside a # heading are merged into clean plain text."""
+        md = tmp_path / "sec.md"
+        md.write_text(
+            "## **UNITED STATES** **SECURITIES AND EXCHANGE COMMISSION**\n\nBody text.\n",
+            encoding="utf-8",
+        )
+        outline = extract_outline(md)
+        assert len(outline) == 1
+        # Title must be clean — no ** ** artefacts
+        assert outline[0]["title"] == "UNITED STATES SECURITIES AND EXCHANGE COMMISSION"
diff --git a/backend/tests/test_uploads_middleware_core_logic.py b/backend/tests/test_uploads_middleware_core_logic.py
index ebc9ab071..72639fb09 100644
--- a/backend/tests/test_uploads_middleware_core_logic.py
+++ b/backend/tests/test_uploads_middleware_core_logic.py
@@ -290,6 +290,7 @@ class TestBeforeAgent:
                 "path": "/mnt/user-data/uploads/notes.txt",
                 "extension": ".txt",
                 "outline": [],
+                "outline_preview": [],
             }
         ]
 
@@ -429,3 +430,41 @@ class TestBeforeAgent:
         content = result["messages"][-1].content
         assert "Chapter 1" in content
         assert "Chapter 2" in content
+
+    def test_fallback_preview_shown_when_outline_empty(self, tmp_path):
+        """When .md exists but has no headings, first lines are shown as a preview."""
+        mw = _middleware(tmp_path)
+        uploads_dir = _uploads_dir(tmp_path)
+        (uploads_dir / "report.pdf").write_bytes(b"%PDF fake")
+        # .md with no # headings — plain prose only
+        (uploads_dir / "report.md").write_text(
+            "Annual Financial Report 2024\n\nThis document summarises key findings.\n\nRevenue grew by 12%.\n",
+            encoding="utf-8",
+        )
+
+        msg = _human("analyse", files=[{"filename": "report.pdf", "size": 9, "path": "/mnt/user-data/uploads/report.pdf"}])
+        result = mw.before_agent(self._state(msg), _runtime())
+
+        assert result is not None
+        content = result["messages"][-1].content
+        # Outline section must NOT appear
+        assert "Document outline" not in content
+        # Preview lines must appear
+        assert "Annual Financial Report 2024" in content
+        assert "No structural headings detected" in content
+        # grep hint must appear
+        assert "grep" in content
+
+    def test_fallback_grep_hint_shown_when_no_md_file(self, tmp_path):
+        """Files with no sibling .md still get the grep hint (outline is empty)."""
+        mw = _middleware(tmp_path)
+        uploads_dir = _uploads_dir(tmp_path)
+        (uploads_dir / "data.csv").write_bytes(b"a,b,c\n1,2,3\n")
+
+        msg = _human("analyse", files=[{"filename": "data.csv", "size": 12, "path": "/mnt/user-data/uploads/data.csv"}])
+        result = mw.before_agent(self._state(msg), _runtime())
+
+        assert result is not None
+        content = result["messages"][-1].content
+        assert "Document outline" not in content
+        assert "grep" in content