(subfeattitle): 优化字幕功能并增加位置设置

- 调整音频设置的滑块范围和默认值 - 修复生成脚本短片功能的导入问题 - 优化字幕设置界面，增加字幕位置选项- 改进字幕渲染逻辑，支持多种位置配置 - 调整视频设置中的原始音量滑块范围和默认值
2025-12-14 13:02:50 +00:00 · 2024-12-11 11:51:02 +08:00 · 2024-12-11 11:51:02 +08:00 · 11a4cf0900
commit 11a4cf0900
parent c065800072
7 changed files with 93 additions and 67 deletions
--- a/app/models/schema.py
+++ b/app/models/schema.py
@ -362,7 +362,7 @@ class VideoClipParams(BaseModel):
    stroke_width: float = 1.5                   # 描边宽度
    subtitle_position: str = "bottom"  # top, bottom, center, custom

-    n_threads: Optional[int] = Field(default=16, description="解说语音音量")    # 线程数，有助于提升视频处理速度
+    n_threads: Optional[int] = Field(default=16, description="解说语音音量")    # 线程<EFBFBD><EFBFBD><EFBFBD>，有助于提升视频处理速度

    tts_volume: Optional[float] = Field(default=1.0, description="解说语音音量（后处理）")
    original_volume: Optional[float] = Field(default=1.0, description="视频原声音量")
@ -379,3 +379,10 @@ class VideoTranscriptionRequest(BaseModel):

 class VideoTranscriptionResponse(BaseModel):
    transcription: str
+
+
+class SubtitlePosition(str, Enum):
+    TOP = "top"
+    CENTER = "center"
+    BOTTOM = "bottom"
+
--- a/app/services/task.py
+++ b/app/services/task.py
@ -327,7 +327,7 @@ def start_subclip(task_id: str, params: VideoClipParams, subclip_path_videos: di
        'stroke_color': params.stroke_color,  # 描边颜色
        'stroke_width': params.stroke_width,  # 描边宽度, 范围0-10
        'bg_color': params.text_back_color,   # 半透明黑色背景
-        'position': ('center', 0.2),  # 距离顶部60%的位置
+        'position': (params.subtitle_position, 0.2),  # 距离顶部60%的位置
        'method': 'caption'  # 渲染方法
    }

--- a/app/services/video.py
+++ b/app/services/video.py
@ -16,7 +16,7 @@ from moviepy.editor import (
 )


-from app.models.schema import VideoAspect
+from app.models.schema import VideoAspect, SubtitlePosition


 def wrap_text(text, max_width, font, fontsize=60):
@ -171,7 +171,6 @@ def combine_clip_videos(combined_video_path: str,
        video_clip.write_videofile(
            filename=combined_video_path,
            threads=threads,
-            logger=None,
            audio_codec="aac",
            fps=30,
            temp_audiofile=os.path.join(output_dir, "temp-audio.m4a")
@ -248,16 +247,44 @@ def loop_audio_clip(audio_clip: AudioFileClip, target_duration: float) -> AudioF
    return extended_audio.subclip(0, target_duration)


+def calculate_subtitle_position(position, video_height: int, text_height: int = 0) -> tuple:
+    """
+    计算字幕在视频中的具体位置
+    
+    Args:
+        position: 位置配置，可以是 SubtitlePosition 枚举值或表示距顶部百分比的浮点数
+        video_height: 视频高度
+        text_height: 字幕文本高度
+    
+    Returns:
+        tuple: (x, y) 坐标
+    """
+    margin = 50  # 字幕距离边缘的边距
+    
+    if isinstance(position, (int, float)):
+        # 百分比位置
+        return ('center', int(video_height * position))
+    
+    # 预设位置
+    if position == SubtitlePosition.TOP:
+        return ('center', margin)
+    elif position == SubtitlePosition.CENTER:
+        return ('center', video_height // 2)
+    elif position == SubtitlePosition.BOTTOM:
+        return ('center', video_height - margin - text_height)
+    
+    # 默认底部
+    return ('center', video_height - margin - text_height)
+
+
 def generate_video_v3(
        video_path: str,
+        subtitle_style: dict,
        subtitle_path: Optional[str] = None,
        bgm_path: Optional[str] = None,
        narration_path: Optional[str] = None,
        output_path: str = "output.mp4",
-        # 音量相关参数
        volume_config: dict = None,
-        # 字幕相关参数
-        subtitle_style: dict = None,
        font_path: Optional[str] = None
 ) -> None:
    """
@ -280,7 +307,7 @@ def generate_video_v3(
            - stroke_color: 描边颜色
            - stroke_width: 描边宽度
            - bg_color: 背景色
-            - position: 位置支持 'top'/'center'/'bottom' 或 (x,y) 坐标
+            - position: 位置支持 SubtitlePosition 枚举值或 0-1 之间的浮点数（表示距顶部的百分比）
            - method: 文字渲染方法
        font_path: 字体文件路径（.ttf/.otf 等格式）
    """
@ -308,25 +335,7 @@ def generate_video_v3(
        if os.path.exists(subtitle_path):
            # 检查字体文件
            if font_path and not os.path.exists(font_path):
-                logger.info(f"警告：字体文件不存在: {font_path}，将使用系统默认字体")
-                font_path = 'Arial'
-
-            # 设置默认字幕样式
-            default_style = {
-                'font': font_path if font_path else 'Arial',
-                'fontsize': 24,
-                'color': 'white',
-                'stroke_color': 'black',
-                'stroke_width': 1,
-                'bg_color': None,
-                'position': ('center', 'bottom'),
-                'method': 'label'
-            }
-
-            if subtitle_style:
-                if font_path and 'font' not in subtitle_style:
-                    subtitle_style['font'] = font_path
-                default_style.update(subtitle_style)
+                logger.warning(f"警告：字体文件不存在: {font_path}")

            try:
                subs = pysrt.open(subtitle_path)
@ -354,32 +363,37 @@ def generate_video_v3(
                            logger.info(f"警告：第 {index + 1} 条字幕处理后为空，已跳过")
                            continue

-                        # 计算位置
-                        if isinstance(default_style['position'], tuple):
-                            pos_x, pos_y = default_style['position']
-                            if isinstance(pos_y, float):
-                                y_pos = int(video.h * pos_y)
-                                position = (pos_x, y_pos)
-                            else:
-                                position = default_style['position']
-                        else:
-                            position = default_style['position']
+                        # 创建临时 TextClip 来获取文本高度
+                        temp_clip = TextClip(
+                            subtitle_text,
+                            font=font_path,
+                            fontsize=subtitle_style['fontsize'],
+                            color=subtitle_style['color']
+                        )
+                        text_height = temp_clip.h
+                        temp_clip.close()

-                        # 创建基本的 TextClip
+                        # 计算字幕位置
+                        position = calculate_subtitle_position(
+                            subtitle_style['position'],
+                            video.h,
+                            text_height
+                        )
+
+                        # 创建最终的 TextClip
                        text_clip = (TextClip(
                            subtitle_text,
-                            font=default_style['font'],
-                            fontsize=default_style['fontsize'],
-                            color=default_style['color']
+                            font=font_path,
+                            fontsize=subtitle_style['fontsize'],
+                            color=subtitle_style['color']
                        )
-                                     .set_position(position)
-                                     .set_duration(end_time - start_time)
-                                     .set_start(start_time))
-
+                            .set_position(position)
+                            .set_duration(end_time - start_time)
+                            .set_start(start_time))
                        subtitle_clips.append(text_clip)

                    except Exception as e:
-                        logger.info(f"警告：创建第 {index + 1} 条字幕时出错: {str(e)}")
+                        logger.error(f"警告：创建第 {index + 1} 条字幕时出错: {traceback.format_exc()}")

                logger.info(f"成功创建 {len(subtitle_clips)} 条字幕剪辑")
            except Exception as e:
--- a/webui/components/audio_settings.py
+++ b/webui/components/audio_settings.py
@ -98,9 +98,9 @@ def render_voice_parameters(tr):
    voice_volume = st.slider(
        tr("Speech Volume"),
        min_value=0.0,
-        max_value=2.0,
+        max_value=1.0,
        value=1.0,
-        step=0.1,
+        step=0.01,
        help=tr("Adjust the volume of the original audio")
    )
    st.session_state['voice_volume'] = voice_volume
@ -191,9 +191,9 @@ def render_bgm_settings(tr):
    bgm_volume = st.slider(
        tr("Background Music Volume"),
        min_value=0.0,
-        max_value=2.0,
-        value=1.0,
-        step=0.1,
+        max_value=1.0,
+        value=0.3,
+        step=0.01,
        help=tr("Adjust the volume of the original audio")
    )
    st.session_state['bgm_volume'] = bgm_volume
--- a/webui/components/subtitle_settings.py
+++ b/webui/components/subtitle_settings.py
@ -3,26 +3,28 @@ from app.config import config
 from webui.utils.cache import get_fonts_cache
 import os

+
 def render_subtitle_panel(tr):
    """渲染字幕设置面板"""
    with st.container(border=True):
        st.write(tr("Subtitle Settings"))
-        
+
        # 启用字幕选项
        enable_subtitles = st.checkbox(tr("Enable Subtitles"), value=True)
        st.session_state['subtitle_enabled'] = enable_subtitles
-        
+
        if enable_subtitles:
            render_font_settings(tr)
            render_position_settings(tr)
            render_style_settings(tr)

+
 def render_font_settings(tr):
    """渲染字体设置"""
    # 获取字体列表
    font_dir = os.path.join(os.path.dirname(os.path.dirname(os.path.dirname(__file__))), "resource", "fonts")
    font_names = get_fonts_cache(font_dir)
-    
+
    # 获取保存的字体设置
    saved_font_name = config.ui.get("font_name", "")
    saved_font_name_index = 0
@ -38,7 +40,7 @@ def render_font_settings(tr):
    config.ui["font_name"] = font_name
    st.session_state['font_name'] = font_name

-    # 字体大小
+    # 字体大小 和 字幕大小
    font_cols = st.columns([0.3, 0.7])
    with font_cols[0]:
        saved_text_fore_color = config.ui.get("text_fore_color", "#FFFFFF")
@ -53,13 +55,14 @@ def render_font_settings(tr):
        saved_font_size = config.ui.get("font_size", 60)
        font_size = st.slider(
            tr("Font Size"),
-            min_value=30,
+            min_value=20,
            max_value=100,
            value=saved_font_size
        )
        config.ui["font_size"] = font_size
        st.session_state['font_size'] = font_size

+
 def render_position_settings(tr):
    """渲染位置设置"""
    subtitle_positions = [
@ -68,14 +71,14 @@ def render_position_settings(tr):
        (tr("Bottom"), "bottom"),
        (tr("Custom"), "custom"),
    ]
-    
+
    selected_index = st.selectbox(
        tr("Position"),
        index=2,
        options=range(len(subtitle_positions)),
        format_func=lambda x: subtitle_positions[x][0],
    )
-    
+
    subtitle_position = subtitle_positions[selected_index][1]
    st.session_state['subtitle_position'] = subtitle_position

@ -94,27 +97,29 @@ def render_position_settings(tr):
        except ValueError:
            st.error(tr("Please enter a valid number"))

+
 def render_style_settings(tr):
    """渲染样式设置"""
    stroke_cols = st.columns([0.3, 0.7])
-    
+
    with stroke_cols[0]:
        stroke_color = st.color_picker(
            tr("Stroke Color"),
            value="#000000"
        )
        st.session_state['stroke_color'] = stroke_color
-    
+
    with stroke_cols[1]:
        stroke_width = st.slider(
            tr("Stroke Width"),
            min_value=0.0,
            max_value=10.0,
-            value=1.5,
-            step=0.1
+            value=1.0,
+            step=0.01
        )
        st.session_state['stroke_width'] = stroke_width

+
 def get_subtitle_params():
    """获取字幕参数"""
    return {
@ -126,4 +131,4 @@ def get_subtitle_params():
        'custom_position': st.session_state.get('custom_position', 70.0),
        'stroke_color': st.session_state.get('stroke_color', '#000000'),
        'stroke_width': st.session_state.get('stroke_width', 1.5),
-    } 
+    }
--- a/webui/components/video_settings.py
+++ b/webui/components/video_settings.py
@ -45,9 +45,9 @@ def render_video_config(tr, params):
    params.original_volume = st.slider(
        tr("Original Volume"),
        min_value=0.0,
-        max_value=2.0,
-        value=1.0,
-        step=0.1,
+        max_value=1.0,
+        value=0.7,
+        step=0.01,
        help=tr("Adjust the volume of the original audio")
    )

--- a/webui/tools/generate_script_short.py
+++ b/webui/tools/generate_script_short.py
@ -9,7 +9,6 @@ from loguru import logger

 from app.config import config
 from webui.tools.base import chekc_video_config
-from app.services.SDP.generate_script_short import generate_script


 def generate_script_short(tr, params):
@ -54,6 +53,7 @@ def generate_script_short(tr, params):
                "text_base_url": text_base_url or ""
            }
            chekc_video_config(api_params)
+            from app.services.SDP.generate_script_short import generate_script
            script = generate_script(
                srt_path=srt_path,
                output_path="resource/scripts/merged_subtitle.json",