diff --git a/app/models/schema.py b/app/models/schema.py index 9fc62b6..5e2e909 100644 --- a/app/models/schema.py +++ b/app/models/schema.py @@ -362,7 +362,7 @@ class VideoClipParams(BaseModel): stroke_width: float = 1.5 # 描边宽度 subtitle_position: str = "bottom" # top, bottom, center, custom - n_threads: Optional[int] = Field(default=16, description="解说语音音量") # 线程数,有助于提升视频处理速度 + n_threads: Optional[int] = Field(default=16, description="解说语音音量") # 线程���,有助于提升视频处理速度 tts_volume: Optional[float] = Field(default=1.0, description="解说语音音量(后处理)") original_volume: Optional[float] = Field(default=1.0, description="视频原声音量") @@ -379,3 +379,10 @@ class VideoTranscriptionRequest(BaseModel): class VideoTranscriptionResponse(BaseModel): transcription: str + + +class SubtitlePosition(str, Enum): + TOP = "top" + CENTER = "center" + BOTTOM = "bottom" + diff --git a/app/services/task.py b/app/services/task.py index c11907d..bde85cd 100644 --- a/app/services/task.py +++ b/app/services/task.py @@ -327,7 +327,7 @@ def start_subclip(task_id: str, params: VideoClipParams, subclip_path_videos: di 'stroke_color': params.stroke_color, # 描边颜色 'stroke_width': params.stroke_width, # 描边宽度, 范围0-10 'bg_color': params.text_back_color, # 半透明黑色背景 - 'position': ('center', 0.2), # 距离顶部60%的位置 + 'position': (params.subtitle_position, 0.2), # 距离顶部60%的位置 'method': 'caption' # 渲染方法 } diff --git a/app/services/video.py b/app/services/video.py index 7037aaa..aab38b8 100644 --- a/app/services/video.py +++ b/app/services/video.py @@ -16,7 +16,7 @@ from moviepy.editor import ( ) -from app.models.schema import VideoAspect +from app.models.schema import VideoAspect, SubtitlePosition def wrap_text(text, max_width, font, fontsize=60): @@ -171,7 +171,6 @@ def combine_clip_videos(combined_video_path: str, video_clip.write_videofile( filename=combined_video_path, threads=threads, - logger=None, audio_codec="aac", fps=30, temp_audiofile=os.path.join(output_dir, "temp-audio.m4a") @@ -248,16 +247,44 @@ def loop_audio_clip(audio_clip: AudioFileClip, target_duration: float) -> AudioF return extended_audio.subclip(0, target_duration) +def calculate_subtitle_position(position, video_height: int, text_height: int = 0) -> tuple: + """ + 计算字幕在视频中的具体位置 + + Args: + position: 位置配置,可以是 SubtitlePosition 枚举值或表示距顶部百分比的浮点数 + video_height: 视频高度 + text_height: 字幕文本高度 + + Returns: + tuple: (x, y) 坐标 + """ + margin = 50 # 字幕距离边缘的边距 + + if isinstance(position, (int, float)): + # 百分比位置 + return ('center', int(video_height * position)) + + # 预设位置 + if position == SubtitlePosition.TOP: + return ('center', margin) + elif position == SubtitlePosition.CENTER: + return ('center', video_height // 2) + elif position == SubtitlePosition.BOTTOM: + return ('center', video_height - margin - text_height) + + # 默认底部 + return ('center', video_height - margin - text_height) + + def generate_video_v3( video_path: str, + subtitle_style: dict, subtitle_path: Optional[str] = None, bgm_path: Optional[str] = None, narration_path: Optional[str] = None, output_path: str = "output.mp4", - # 音量相关参数 volume_config: dict = None, - # 字幕相关参数 - subtitle_style: dict = None, font_path: Optional[str] = None ) -> None: """ @@ -280,7 +307,7 @@ def generate_video_v3( - stroke_color: 描边颜色 - stroke_width: 描边宽度 - bg_color: 背景色 - - position: 位置支持 'top'/'center'/'bottom' 或 (x,y) 坐标 + - position: 位置支持 SubtitlePosition 枚举值或 0-1 之间的浮点数(表示距顶部的百分比) - method: 文字渲染方法 font_path: 字体文件路径(.ttf/.otf 等格式) """ @@ -308,25 +335,7 @@ def generate_video_v3( if os.path.exists(subtitle_path): # 检查字体文件 if font_path and not os.path.exists(font_path): - logger.info(f"警告:字体文件不存在: {font_path},将使用系统默认字体") - font_path = 'Arial' - - # 设置默认字幕样式 - default_style = { - 'font': font_path if font_path else 'Arial', - 'fontsize': 24, - 'color': 'white', - 'stroke_color': 'black', - 'stroke_width': 1, - 'bg_color': None, - 'position': ('center', 'bottom'), - 'method': 'label' - } - - if subtitle_style: - if font_path and 'font' not in subtitle_style: - subtitle_style['font'] = font_path - default_style.update(subtitle_style) + logger.warning(f"警告:字体文件不存在: {font_path}") try: subs = pysrt.open(subtitle_path) @@ -354,32 +363,37 @@ def generate_video_v3( logger.info(f"警告:第 {index + 1} 条字幕处理后为空,已跳过") continue - # 计算位置 - if isinstance(default_style['position'], tuple): - pos_x, pos_y = default_style['position'] - if isinstance(pos_y, float): - y_pos = int(video.h * pos_y) - position = (pos_x, y_pos) - else: - position = default_style['position'] - else: - position = default_style['position'] + # 创建临时 TextClip 来获取文本高度 + temp_clip = TextClip( + subtitle_text, + font=font_path, + fontsize=subtitle_style['fontsize'], + color=subtitle_style['color'] + ) + text_height = temp_clip.h + temp_clip.close() - # 创建基本的 TextClip + # 计算字幕位置 + position = calculate_subtitle_position( + subtitle_style['position'], + video.h, + text_height + ) + + # 创建最终的 TextClip text_clip = (TextClip( subtitle_text, - font=default_style['font'], - fontsize=default_style['fontsize'], - color=default_style['color'] + font=font_path, + fontsize=subtitle_style['fontsize'], + color=subtitle_style['color'] ) - .set_position(position) - .set_duration(end_time - start_time) - .set_start(start_time)) - + .set_position(position) + .set_duration(end_time - start_time) + .set_start(start_time)) subtitle_clips.append(text_clip) except Exception as e: - logger.info(f"警告:创建第 {index + 1} 条字幕时出错: {str(e)}") + logger.error(f"警告:创建第 {index + 1} 条字幕时出错: {traceback.format_exc()}") logger.info(f"成功创建 {len(subtitle_clips)} 条字幕剪辑") except Exception as e: diff --git a/webui/components/audio_settings.py b/webui/components/audio_settings.py index 1c49af4..0d868c4 100644 --- a/webui/components/audio_settings.py +++ b/webui/components/audio_settings.py @@ -98,9 +98,9 @@ def render_voice_parameters(tr): voice_volume = st.slider( tr("Speech Volume"), min_value=0.0, - max_value=2.0, + max_value=1.0, value=1.0, - step=0.1, + step=0.01, help=tr("Adjust the volume of the original audio") ) st.session_state['voice_volume'] = voice_volume @@ -191,9 +191,9 @@ def render_bgm_settings(tr): bgm_volume = st.slider( tr("Background Music Volume"), min_value=0.0, - max_value=2.0, - value=1.0, - step=0.1, + max_value=1.0, + value=0.3, + step=0.01, help=tr("Adjust the volume of the original audio") ) st.session_state['bgm_volume'] = bgm_volume diff --git a/webui/components/subtitle_settings.py b/webui/components/subtitle_settings.py index ba9d2c7..cb624dc 100644 --- a/webui/components/subtitle_settings.py +++ b/webui/components/subtitle_settings.py @@ -3,26 +3,28 @@ from app.config import config from webui.utils.cache import get_fonts_cache import os + def render_subtitle_panel(tr): """渲染字幕设置面板""" with st.container(border=True): st.write(tr("Subtitle Settings")) - + # 启用字幕选项 enable_subtitles = st.checkbox(tr("Enable Subtitles"), value=True) st.session_state['subtitle_enabled'] = enable_subtitles - + if enable_subtitles: render_font_settings(tr) render_position_settings(tr) render_style_settings(tr) + def render_font_settings(tr): """渲染字体设置""" # 获取字体列表 font_dir = os.path.join(os.path.dirname(os.path.dirname(os.path.dirname(__file__))), "resource", "fonts") font_names = get_fonts_cache(font_dir) - + # 获取保存的字体设置 saved_font_name = config.ui.get("font_name", "") saved_font_name_index = 0 @@ -38,7 +40,7 @@ def render_font_settings(tr): config.ui["font_name"] = font_name st.session_state['font_name'] = font_name - # 字体大小 + # 字体大小 和 字幕大小 font_cols = st.columns([0.3, 0.7]) with font_cols[0]: saved_text_fore_color = config.ui.get("text_fore_color", "#FFFFFF") @@ -53,13 +55,14 @@ def render_font_settings(tr): saved_font_size = config.ui.get("font_size", 60) font_size = st.slider( tr("Font Size"), - min_value=30, + min_value=20, max_value=100, value=saved_font_size ) config.ui["font_size"] = font_size st.session_state['font_size'] = font_size + def render_position_settings(tr): """渲染位置设置""" subtitle_positions = [ @@ -68,14 +71,14 @@ def render_position_settings(tr): (tr("Bottom"), "bottom"), (tr("Custom"), "custom"), ] - + selected_index = st.selectbox( tr("Position"), index=2, options=range(len(subtitle_positions)), format_func=lambda x: subtitle_positions[x][0], ) - + subtitle_position = subtitle_positions[selected_index][1] st.session_state['subtitle_position'] = subtitle_position @@ -94,27 +97,29 @@ def render_position_settings(tr): except ValueError: st.error(tr("Please enter a valid number")) + def render_style_settings(tr): """渲染样式设置""" stroke_cols = st.columns([0.3, 0.7]) - + with stroke_cols[0]: stroke_color = st.color_picker( tr("Stroke Color"), value="#000000" ) st.session_state['stroke_color'] = stroke_color - + with stroke_cols[1]: stroke_width = st.slider( tr("Stroke Width"), min_value=0.0, max_value=10.0, - value=1.5, - step=0.1 + value=1.0, + step=0.01 ) st.session_state['stroke_width'] = stroke_width + def get_subtitle_params(): """获取字幕参数""" return { @@ -126,4 +131,4 @@ def get_subtitle_params(): 'custom_position': st.session_state.get('custom_position', 70.0), 'stroke_color': st.session_state.get('stroke_color', '#000000'), 'stroke_width': st.session_state.get('stroke_width', 1.5), - } \ No newline at end of file + } diff --git a/webui/components/video_settings.py b/webui/components/video_settings.py index 7d9cc67..695c9e7 100644 --- a/webui/components/video_settings.py +++ b/webui/components/video_settings.py @@ -45,9 +45,9 @@ def render_video_config(tr, params): params.original_volume = st.slider( tr("Original Volume"), min_value=0.0, - max_value=2.0, - value=1.0, - step=0.1, + max_value=1.0, + value=0.7, + step=0.01, help=tr("Adjust the volume of the original audio") ) diff --git a/webui/tools/generate_script_short.py b/webui/tools/generate_script_short.py index 31c9982..5400ff1 100644 --- a/webui/tools/generate_script_short.py +++ b/webui/tools/generate_script_short.py @@ -9,7 +9,6 @@ from loguru import logger from app.config import config from webui.tools.base import chekc_video_config -from app.services.SDP.generate_script_short import generate_script def generate_script_short(tr, params): @@ -54,6 +53,7 @@ def generate_script_short(tr, params): "text_base_url": text_base_url or "" } chekc_video_config(api_params) + from app.services.SDP.generate_script_short import generate_script script = generate_script( srt_path=srt_path, output_path="resource/scripts/merged_subtitle.json",