(subfeattitle): 优化字幕功能并增加位置设置

- 调整音频设置的滑块范围和默认值
- 修复生成脚本短片功能的导入问题
- 优化字幕设置界面,增加字幕位置选项- 改进字幕渲染逻辑,支持多种位置配置
- 调整视频设置中的原始音量滑块范围和默认值
This commit is contained in:
linyq 2024-12-11 11:51:02 +08:00
parent c065800072
commit 11a4cf0900
7 changed files with 93 additions and 67 deletions

View File

@ -362,7 +362,7 @@ class VideoClipParams(BaseModel):
stroke_width: float = 1.5 # 描边宽度 stroke_width: float = 1.5 # 描边宽度
subtitle_position: str = "bottom" # top, bottom, center, custom subtitle_position: str = "bottom" # top, bottom, center, custom
n_threads: Optional[int] = Field(default=16, description="解说语音音量") # 线程,有助于提升视频处理速度 n_threads: Optional[int] = Field(default=16, description="解说语音音量") # 线程<EFBFBD><EFBFBD><EFBFBD>,有助于提升视频处理速度
tts_volume: Optional[float] = Field(default=1.0, description="解说语音音量(后处理)") tts_volume: Optional[float] = Field(default=1.0, description="解说语音音量(后处理)")
original_volume: Optional[float] = Field(default=1.0, description="视频原声音量") original_volume: Optional[float] = Field(default=1.0, description="视频原声音量")
@ -379,3 +379,10 @@ class VideoTranscriptionRequest(BaseModel):
class VideoTranscriptionResponse(BaseModel): class VideoTranscriptionResponse(BaseModel):
transcription: str transcription: str
class SubtitlePosition(str, Enum):
TOP = "top"
CENTER = "center"
BOTTOM = "bottom"

View File

@ -327,7 +327,7 @@ def start_subclip(task_id: str, params: VideoClipParams, subclip_path_videos: di
'stroke_color': params.stroke_color, # 描边颜色 'stroke_color': params.stroke_color, # 描边颜色
'stroke_width': params.stroke_width, # 描边宽度, 范围0-10 'stroke_width': params.stroke_width, # 描边宽度, 范围0-10
'bg_color': params.text_back_color, # 半透明黑色背景 'bg_color': params.text_back_color, # 半透明黑色背景
'position': ('center', 0.2), # 距离顶部60%的位置 'position': (params.subtitle_position, 0.2), # 距离顶部60%的位置
'method': 'caption' # 渲染方法 'method': 'caption' # 渲染方法
} }

View File

@ -16,7 +16,7 @@ from moviepy.editor import (
) )
from app.models.schema import VideoAspect from app.models.schema import VideoAspect, SubtitlePosition
def wrap_text(text, max_width, font, fontsize=60): def wrap_text(text, max_width, font, fontsize=60):
@ -171,7 +171,6 @@ def combine_clip_videos(combined_video_path: str,
video_clip.write_videofile( video_clip.write_videofile(
filename=combined_video_path, filename=combined_video_path,
threads=threads, threads=threads,
logger=None,
audio_codec="aac", audio_codec="aac",
fps=30, fps=30,
temp_audiofile=os.path.join(output_dir, "temp-audio.m4a") temp_audiofile=os.path.join(output_dir, "temp-audio.m4a")
@ -248,16 +247,44 @@ def loop_audio_clip(audio_clip: AudioFileClip, target_duration: float) -> AudioF
return extended_audio.subclip(0, target_duration) return extended_audio.subclip(0, target_duration)
def calculate_subtitle_position(position, video_height: int, text_height: int = 0) -> tuple:
"""
计算字幕在视频中的具体位置
Args:
position: 位置配置可以是 SubtitlePosition 枚举值或表示距顶部百分比的浮点数
video_height: 视频高度
text_height: 字幕文本高度
Returns:
tuple: (x, y) 坐标
"""
margin = 50 # 字幕距离边缘的边距
if isinstance(position, (int, float)):
# 百分比位置
return ('center', int(video_height * position))
# 预设位置
if position == SubtitlePosition.TOP:
return ('center', margin)
elif position == SubtitlePosition.CENTER:
return ('center', video_height // 2)
elif position == SubtitlePosition.BOTTOM:
return ('center', video_height - margin - text_height)
# 默认底部
return ('center', video_height - margin - text_height)
def generate_video_v3( def generate_video_v3(
video_path: str, video_path: str,
subtitle_style: dict,
subtitle_path: Optional[str] = None, subtitle_path: Optional[str] = None,
bgm_path: Optional[str] = None, bgm_path: Optional[str] = None,
narration_path: Optional[str] = None, narration_path: Optional[str] = None,
output_path: str = "output.mp4", output_path: str = "output.mp4",
# 音量相关参数
volume_config: dict = None, volume_config: dict = None,
# 字幕相关参数
subtitle_style: dict = None,
font_path: Optional[str] = None font_path: Optional[str] = None
) -> None: ) -> None:
""" """
@ -280,7 +307,7 @@ def generate_video_v3(
- stroke_color: 描边颜色 - stroke_color: 描边颜色
- stroke_width: 描边宽度 - stroke_width: 描边宽度
- bg_color: 背景色 - bg_color: 背景色
- position: 位置支持 'top'/'center'/'bottom' (x,y) 坐标 - position: 位置支持 SubtitlePosition 枚举值或 0-1 之间的浮点数表示距顶部的百分比
- method: 文字渲染方法 - method: 文字渲染方法
font_path: 字体文件路径.ttf/.otf 等格式 font_path: 字体文件路径.ttf/.otf 等格式
""" """
@ -308,25 +335,7 @@ def generate_video_v3(
if os.path.exists(subtitle_path): if os.path.exists(subtitle_path):
# 检查字体文件 # 检查字体文件
if font_path and not os.path.exists(font_path): if font_path and not os.path.exists(font_path):
logger.info(f"警告:字体文件不存在: {font_path},将使用系统默认字体") logger.warning(f"警告:字体文件不存在: {font_path}")
font_path = 'Arial'
# 设置默认字幕样式
default_style = {
'font': font_path if font_path else 'Arial',
'fontsize': 24,
'color': 'white',
'stroke_color': 'black',
'stroke_width': 1,
'bg_color': None,
'position': ('center', 'bottom'),
'method': 'label'
}
if subtitle_style:
if font_path and 'font' not in subtitle_style:
subtitle_style['font'] = font_path
default_style.update(subtitle_style)
try: try:
subs = pysrt.open(subtitle_path) subs = pysrt.open(subtitle_path)
@ -354,32 +363,37 @@ def generate_video_v3(
logger.info(f"警告:第 {index + 1} 条字幕处理后为空,已跳过") logger.info(f"警告:第 {index + 1} 条字幕处理后为空,已跳过")
continue continue
# 计算位置 # 创建临时 TextClip 来获取文本高度
if isinstance(default_style['position'], tuple): temp_clip = TextClip(
pos_x, pos_y = default_style['position'] subtitle_text,
if isinstance(pos_y, float): font=font_path,
y_pos = int(video.h * pos_y) fontsize=subtitle_style['fontsize'],
position = (pos_x, y_pos) color=subtitle_style['color']
else: )
position = default_style['position'] text_height = temp_clip.h
else: temp_clip.close()
position = default_style['position']
# 创建基本的 TextClip # 计算字幕位置
position = calculate_subtitle_position(
subtitle_style['position'],
video.h,
text_height
)
# 创建最终的 TextClip
text_clip = (TextClip( text_clip = (TextClip(
subtitle_text, subtitle_text,
font=default_style['font'], font=font_path,
fontsize=default_style['fontsize'], fontsize=subtitle_style['fontsize'],
color=default_style['color'] color=subtitle_style['color']
) )
.set_position(position) .set_position(position)
.set_duration(end_time - start_time) .set_duration(end_time - start_time)
.set_start(start_time)) .set_start(start_time))
subtitle_clips.append(text_clip) subtitle_clips.append(text_clip)
except Exception as e: except Exception as e:
logger.info(f"警告:创建第 {index + 1} 条字幕时出错: {str(e)}") logger.error(f"警告:创建第 {index + 1} 条字幕时出错: {traceback.format_exc()}")
logger.info(f"成功创建 {len(subtitle_clips)} 条字幕剪辑") logger.info(f"成功创建 {len(subtitle_clips)} 条字幕剪辑")
except Exception as e: except Exception as e:

View File

@ -98,9 +98,9 @@ def render_voice_parameters(tr):
voice_volume = st.slider( voice_volume = st.slider(
tr("Speech Volume"), tr("Speech Volume"),
min_value=0.0, min_value=0.0,
max_value=2.0, max_value=1.0,
value=1.0, value=1.0,
step=0.1, step=0.01,
help=tr("Adjust the volume of the original audio") help=tr("Adjust the volume of the original audio")
) )
st.session_state['voice_volume'] = voice_volume st.session_state['voice_volume'] = voice_volume
@ -191,9 +191,9 @@ def render_bgm_settings(tr):
bgm_volume = st.slider( bgm_volume = st.slider(
tr("Background Music Volume"), tr("Background Music Volume"),
min_value=0.0, min_value=0.0,
max_value=2.0, max_value=1.0,
value=1.0, value=0.3,
step=0.1, step=0.01,
help=tr("Adjust the volume of the original audio") help=tr("Adjust the volume of the original audio")
) )
st.session_state['bgm_volume'] = bgm_volume st.session_state['bgm_volume'] = bgm_volume

View File

@ -3,26 +3,28 @@ from app.config import config
from webui.utils.cache import get_fonts_cache from webui.utils.cache import get_fonts_cache
import os import os
def render_subtitle_panel(tr): def render_subtitle_panel(tr):
"""渲染字幕设置面板""" """渲染字幕设置面板"""
with st.container(border=True): with st.container(border=True):
st.write(tr("Subtitle Settings")) st.write(tr("Subtitle Settings"))
# 启用字幕选项 # 启用字幕选项
enable_subtitles = st.checkbox(tr("Enable Subtitles"), value=True) enable_subtitles = st.checkbox(tr("Enable Subtitles"), value=True)
st.session_state['subtitle_enabled'] = enable_subtitles st.session_state['subtitle_enabled'] = enable_subtitles
if enable_subtitles: if enable_subtitles:
render_font_settings(tr) render_font_settings(tr)
render_position_settings(tr) render_position_settings(tr)
render_style_settings(tr) render_style_settings(tr)
def render_font_settings(tr): def render_font_settings(tr):
"""渲染字体设置""" """渲染字体设置"""
# 获取字体列表 # 获取字体列表
font_dir = os.path.join(os.path.dirname(os.path.dirname(os.path.dirname(__file__))), "resource", "fonts") font_dir = os.path.join(os.path.dirname(os.path.dirname(os.path.dirname(__file__))), "resource", "fonts")
font_names = get_fonts_cache(font_dir) font_names = get_fonts_cache(font_dir)
# 获取保存的字体设置 # 获取保存的字体设置
saved_font_name = config.ui.get("font_name", "") saved_font_name = config.ui.get("font_name", "")
saved_font_name_index = 0 saved_font_name_index = 0
@ -38,7 +40,7 @@ def render_font_settings(tr):
config.ui["font_name"] = font_name config.ui["font_name"] = font_name
st.session_state['font_name'] = font_name st.session_state['font_name'] = font_name
# 字体大小 # 字体大小 和 字幕大小
font_cols = st.columns([0.3, 0.7]) font_cols = st.columns([0.3, 0.7])
with font_cols[0]: with font_cols[0]:
saved_text_fore_color = config.ui.get("text_fore_color", "#FFFFFF") saved_text_fore_color = config.ui.get("text_fore_color", "#FFFFFF")
@ -53,13 +55,14 @@ def render_font_settings(tr):
saved_font_size = config.ui.get("font_size", 60) saved_font_size = config.ui.get("font_size", 60)
font_size = st.slider( font_size = st.slider(
tr("Font Size"), tr("Font Size"),
min_value=30, min_value=20,
max_value=100, max_value=100,
value=saved_font_size value=saved_font_size
) )
config.ui["font_size"] = font_size config.ui["font_size"] = font_size
st.session_state['font_size'] = font_size st.session_state['font_size'] = font_size
def render_position_settings(tr): def render_position_settings(tr):
"""渲染位置设置""" """渲染位置设置"""
subtitle_positions = [ subtitle_positions = [
@ -68,14 +71,14 @@ def render_position_settings(tr):
(tr("Bottom"), "bottom"), (tr("Bottom"), "bottom"),
(tr("Custom"), "custom"), (tr("Custom"), "custom"),
] ]
selected_index = st.selectbox( selected_index = st.selectbox(
tr("Position"), tr("Position"),
index=2, index=2,
options=range(len(subtitle_positions)), options=range(len(subtitle_positions)),
format_func=lambda x: subtitle_positions[x][0], format_func=lambda x: subtitle_positions[x][0],
) )
subtitle_position = subtitle_positions[selected_index][1] subtitle_position = subtitle_positions[selected_index][1]
st.session_state['subtitle_position'] = subtitle_position st.session_state['subtitle_position'] = subtitle_position
@ -94,27 +97,29 @@ def render_position_settings(tr):
except ValueError: except ValueError:
st.error(tr("Please enter a valid number")) st.error(tr("Please enter a valid number"))
def render_style_settings(tr): def render_style_settings(tr):
"""渲染样式设置""" """渲染样式设置"""
stroke_cols = st.columns([0.3, 0.7]) stroke_cols = st.columns([0.3, 0.7])
with stroke_cols[0]: with stroke_cols[0]:
stroke_color = st.color_picker( stroke_color = st.color_picker(
tr("Stroke Color"), tr("Stroke Color"),
value="#000000" value="#000000"
) )
st.session_state['stroke_color'] = stroke_color st.session_state['stroke_color'] = stroke_color
with stroke_cols[1]: with stroke_cols[1]:
stroke_width = st.slider( stroke_width = st.slider(
tr("Stroke Width"), tr("Stroke Width"),
min_value=0.0, min_value=0.0,
max_value=10.0, max_value=10.0,
value=1.5, value=1.0,
step=0.1 step=0.01
) )
st.session_state['stroke_width'] = stroke_width st.session_state['stroke_width'] = stroke_width
def get_subtitle_params(): def get_subtitle_params():
"""获取字幕参数""" """获取字幕参数"""
return { return {
@ -126,4 +131,4 @@ def get_subtitle_params():
'custom_position': st.session_state.get('custom_position', 70.0), 'custom_position': st.session_state.get('custom_position', 70.0),
'stroke_color': st.session_state.get('stroke_color', '#000000'), 'stroke_color': st.session_state.get('stroke_color', '#000000'),
'stroke_width': st.session_state.get('stroke_width', 1.5), 'stroke_width': st.session_state.get('stroke_width', 1.5),
} }

View File

@ -45,9 +45,9 @@ def render_video_config(tr, params):
params.original_volume = st.slider( params.original_volume = st.slider(
tr("Original Volume"), tr("Original Volume"),
min_value=0.0, min_value=0.0,
max_value=2.0, max_value=1.0,
value=1.0, value=0.7,
step=0.1, step=0.01,
help=tr("Adjust the volume of the original audio") help=tr("Adjust the volume of the original audio")
) )

View File

@ -9,7 +9,6 @@ from loguru import logger
from app.config import config from app.config import config
from webui.tools.base import chekc_video_config from webui.tools.base import chekc_video_config
from app.services.SDP.generate_script_short import generate_script
def generate_script_short(tr, params): def generate_script_short(tr, params):
@ -54,6 +53,7 @@ def generate_script_short(tr, params):
"text_base_url": text_base_url or "" "text_base_url": text_base_url or ""
} }
chekc_video_config(api_params) chekc_video_config(api_params)
from app.services.SDP.generate_script_short import generate_script
script = generate_script( script = generate_script(
srt_path=srt_path, srt_path=srt_path,
output_path="resource/scripts/merged_subtitle.json", output_path="resource/scripts/merged_subtitle.json",