diff --git a/app/config/config.py b/app/config/config.py
index 67bb88a..c8957a4 100644
--- a/app/config/config.py
+++ b/app/config/config.py
@@ -47,6 +47,7 @@ def load_config():
 def save_config():
     with open(config_file, "w", encoding="utf-8") as f:
         _cfg["app"] = app
+        _cfg["proxy"] = proxy
         _cfg["azure"] = azure
         _cfg["tencent"] = tencent
         _cfg["soulvoice"] = soulvoice
diff --git a/app/services/voice.py b/app/services/voice.py
index a1acf94..4e5e419 100644
--- a/app/services/voice.py
+++ b/app/services/voice.py
@@ -41,6 +41,42 @@ def mktimestamp(time_seconds: float) -> str:
     return f"{hours:02d}:{minutes:02d}:{seconds:06.3f}"
 
 
+def new_sub_maker() -> SubMaker:
+    """创建兼容新旧 edge-tts API 的 SubMaker。"""
+    sub_maker = SubMaker()
+    if not hasattr(sub_maker, "subs"):
+        sub_maker.subs = []
+    if not hasattr(sub_maker, "offset"):
+        sub_maker.offset = []
+    return sub_maker
+
+
+def add_subtitle_event(
+    sub_maker: SubMaker,
+    start_offset: int,
+    end_offset: int,
+    text: str,
+    boundary_type: str = "WordBoundary",
+) -> None:
+    """向 SubMaker 写入项目兼容的字幕事件。"""
+    if hasattr(sub_maker, "feed"):
+        duration = max(0, end_offset - start_offset)
+        try:
+            sub_maker.feed(
+                {
+                    "type": boundary_type,
+                    "offset": start_offset,
+                    "duration": duration,
+                    "text": text,
+                }
+            )
+        except Exception:
+            pass
+
+    sub_maker.subs.append(text)
+    sub_maker.offset.append((start_offset, end_offset))
+
+
 def get_all_azure_voices(filter_locals=None) -> list[str]:
     if filter_locals is None:
         filter_locals = ["zh-CN", "en-US", "zh-HK", "zh-TW", "vi-VN"]
@@ -1137,6 +1173,16 @@ def convert_pitch_to_percent(rate: float) -> str:
         return f"{percent}Hz"
 
 
+def get_edge_tts_proxy() -> str | None:
+    """返回 Edge TTS 应使用的代理地址。"""
+    proxy_enabled = config.proxy.get("enabled")
+    if proxy_enabled is False:
+        return None
+
+    proxy_url = (config.proxy.get("https") or config.proxy.get("http") or "").strip()
+    return proxy_url or None
+
+
 def azure_tts_v1(
     text: str, voice_name: str, voice_rate: float, voice_pitch: float, voice_file: str
 ) -> Union[SubMaker, None]:
@@ -1149,16 +1195,29 @@ def azure_tts_v1(
             logger.info(f"第 {i+1} 次使用 edge_tts 生成音频")
 
             async def _do() -> tuple[SubMaker, bytes]:
-                communicate = edge_tts.Communicate(text, voice_name, rate=rate_str, pitch=pitch_str, proxy=config.proxy.get("http"))
-                sub_maker = edge_tts.SubMaker()
+                communicate = edge_tts.Communicate(
+                    text,
+                    voice_name,
+                    rate=rate_str,
+                    pitch=pitch_str,
+                    boundary="WordBoundary",
+                    proxy=get_edge_tts_proxy(),
+                    connect_timeout=10,
+                    receive_timeout=60,
+                )
+                sub_maker = new_sub_maker()
                 audio_data = bytes()  # 用于存储音频数据
                 
                 async for chunk in communicate.stream():
                     if chunk["type"] == "audio":
                         audio_data += chunk["data"]
-                    elif chunk["type"] == "WordBoundary":
-                        sub_maker.create_sub(
-                            (chunk["offset"], chunk["duration"]), chunk["text"]
+                    elif chunk["type"] in {"WordBoundary", "SentenceBoundary"}:
+                        add_subtitle_event(
+                            sub_maker,
+                            start_offset=chunk["offset"],
+                            end_offset=chunk["offset"] + chunk["duration"],
+                            text=chunk["text"],
+                            boundary_type=chunk["type"],
                         )
                 return sub_maker, audio_data
 
@@ -1166,18 +1225,21 @@ def azure_tts_v1(
             sub_maker, audio_data = asyncio.run(_do())
             
             # 验证数据是否有效
-            if not sub_maker or not sub_maker.subs or not audio_data:
-                logger.warning(f"failed, invalid data generated")
+            if not audio_data:
+                logger.warning("failed, no audio data generated")
                 if i < 2:
                     time.sleep(1)
                 continue
 
+            if not sub_maker.subs:
+                logger.warning("edge_tts returned audio without boundary events; subtitle timing may be unavailable")
+
             # 数据有效，写入文件
             with open(voice_file, "wb") as file:
                 file.write(audio_data)
             return sub_maker
         except Exception as e:
-            logger.error(f"生成音频文件时出错: {str(e)}")
+            logger.exception(f"生成音频文件时出错: {type(e).__name__}: {str(e)}")
             if i < 2:
                 time.sleep(1)
     return None
@@ -1220,13 +1282,12 @@ def azure_tts_v2(text: str, voice_name: str, voice_file: str) -> Union[SubMaker,
         try:
             logger.info(f"start, voice name: {processed_voice_name}, try: {i + 1}")
 
-            sub_maker = SubMaker()
+            sub_maker = new_sub_maker()
 
             def speech_synthesizer_word_boundary_cb(evt: speechsdk.SessionEventArgs):
                 duration = _format_duration_to_offset(str(evt.duration))
                 offset = _format_duration_to_offset(evt.audio_offset)
-                sub_maker.subs.append(evt.text)
-                sub_maker.offset.append((offset, offset + duration))
+                add_subtitle_event(sub_maker, offset, offset + duration, evt.text)
 
             # Creates an instance of a speech config with specified subscription key and service region.
             speech_key = config.azure.get("speech_key", "")
@@ -1717,9 +1778,9 @@ def qwen3_tts(text: str, voice_name: str, voice_file: str, speed: float = 1.0) -
                 f.write(audio_bytes)
 
             # 估算字幕
-            sub = SubMaker()
+            sub = new_sub_maker()
             est_ms = max(800, int(len(text) * 180))
-            sub.create_sub((0, est_ms), text)
+            add_subtitle_event(sub, 0, est_ms, text)
             
             logger.info(f"Qwen3 TTS 生成成功（DashScope SDK），文件大小: {len(audio_bytes)} 字节")
             return sub
@@ -1811,18 +1872,18 @@ def tencent_tts(text: str, voice_name: str, voice_file: str, speed: float = 1.0)
                 f.write(audio_data)
 
             # 创建字幕对象
-            sub_maker = SubMaker()
+            sub_maker = new_sub_maker()
             if resp.Subtitles:
                 for sub in resp.Subtitles:
                     start_ms = sub.BeginTime
                     end_ms = sub.EndTime
                     text = sub.Text
                     # 转换为 100ns 单位
-                    sub_maker.create_sub((start_ms * 10000, end_ms * 10000), text)
+                    add_subtitle_event(sub_maker, start_ms * 10000, end_ms * 10000, text)
             else:
                 # 如果没有字幕返回，则使用估算作为后备方案
                 duration_ms = len(text) * 200
-                sub_maker.create_sub((0, duration_ms * 10000), text)
+                add_subtitle_event(sub_maker, 0, duration_ms * 10000, text)
 
             logger.info(f"腾讯云 TTS 生成成功，文件大小: {len(audio_data)} 字节")
             return sub_maker
@@ -1903,7 +1964,7 @@ def soulvoice_tts(text: str, voice_name: str, voice_file: str, speed: float = 1.
                 logger.info(f"SoulVoice TTS 成功生成音频: {voice_file}")
 
                 # SoulVoice 不支持精确字幕生成，返回简单的 SubMaker 对象
-                sub_maker = SubMaker()
+                sub_maker = new_sub_maker()
                 sub_maker.subs = [text]  # 整个文本作为一个段落
                 sub_maker.offset = [(0, 0)]  # 占位时间戳
 
@@ -2034,10 +2095,10 @@ def indextts2_tts(text: str, voice_name: str, voice_file: str, speed: float = 1.
                 logger.info(f"IndexTTS2 成功生成音频: {voice_file}, 大小: {len(response.content)} 字节")
 
                 # IndexTTS2 不支持精确字幕生成，返回简单的 SubMaker 对象
-                sub_maker = SubMaker()
+                sub_maker = new_sub_maker()
                 # 估算音频时长（基于文本长度）
                 estimated_duration_ms = max(1000, int(len(text) * 200))
-                sub_maker.create_sub((0, estimated_duration_ms * 10000), text)
+                add_subtitle_event(sub_maker, 0, estimated_duration_ms * 10000, text)
 
                 return sub_maker
 
@@ -2068,6 +2129,3 @@ def indextts2_tts(text: str, voice_name: str, voice_file: str, speed: float = 1.
 
     logger.error("IndexTTS2 TTS 生成失败，已达到最大重试次数")
     return None
-
-
-
diff --git a/requirements.txt b/requirements.txt
index 27ab39c..2d09a05 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,7 +1,7 @@
 # 核心依赖
 requests>=2.32.0
 moviepy==2.1.1
-edge-tts==6.1.19
+edge-tts==7.2.7
 streamlit>=1.45.0
 watchdog==6.0.0
 loguru>=0.7.3
@@ -35,4 +35,4 @@ tenacity>=9.0.0
 # 如果需要 CUDA 支持，取消注释下面的行
 # torch>=2.0.0
 # torchvision>=0.15.0
-# torchaudio>=2.0.0
\ No newline at end of file
+# torchaudio>=2.0.0
diff --git a/webui/components/basic_settings.py b/webui/components/basic_settings.py
index 83d642b..60ac96d 100644
--- a/webui/components/basic_settings.py
+++ b/webui/components/basic_settings.py
@@ -179,7 +179,7 @@ def render_proxy_settings(tr):
     proxy_enabled = st.checkbox(tr("Enable Proxy"), value=proxy_enabled)
     
     # 保存代理开关状态
-    # config.proxy["enabled"] = proxy_enabled
+    config.proxy["enabled"] = proxy_enabled
 
     # 只有在代理启用时才显示代理设置输入框
     if proxy_enabled:
@@ -196,8 +196,8 @@ def render_proxy_settings(tr):
         # 当代理被禁用时，清除环境变量和配置
         os.environ.pop("HTTP_PROXY", None)
         os.environ.pop("HTTPS_PROXY", None)
-        # config.proxy["http"] = ""
-        # config.proxy["https"] = ""
+        config.proxy["http"] = ""
+        config.proxy["https"] = ""
 
 
 def test_vision_model_connection(api_key, base_url, model_name, provider, tr):