mirror of
https://github.com/linyqh/NarratoAI.git
synced 2026-06-21 06:42:03 +00:00
- 为VideoClipParams新增原字幕路径配置字段,支持单条/多条字幕路径 - 完善webui参数获取逻辑,处理字幕路径兼容性并对接前端选择 - 重构后端字幕处理流程,支持自动匹配视频对应原字幕,合并原声字幕 - 优化视频合并逻辑,新增ffmpeg无损copy合并判断,自动回退重编码提升效率 - 新增ffmpeg快速素材合并路径,支持自定义字幕样式与多音轨混合 - 新增多个单元测试覆盖字幕匹配、合并及视频合并场景
234 lines
8.2 KiB
Python
234 lines
8.2 KiB
Python
import warnings
|
||
from enum import Enum
|
||
from typing import Any, List, Optional, Union
|
||
|
||
import pydantic
|
||
from pydantic import BaseModel, Field
|
||
|
||
# 忽略 Pydantic 的特定警告
|
||
warnings.filterwarnings(
|
||
"ignore",
|
||
category=UserWarning,
|
||
message="Field name.*shadows an attribute in parent.*",
|
||
)
|
||
|
||
|
||
class AudioVolumeDefaults:
|
||
"""音量配置默认值常量类 - 确保全局一致性"""
|
||
|
||
# 语音音量默认值
|
||
VOICE_VOLUME = 1.0
|
||
TTS_VOLUME = 1.0
|
||
|
||
# 原声音量默认值 - 提高原声音量以平衡TTS
|
||
ORIGINAL_VOLUME = 1.2
|
||
|
||
# 背景音乐音量默认值
|
||
BGM_VOLUME = 0.3
|
||
|
||
# 音量范围
|
||
MIN_VOLUME = 0.0
|
||
MAX_VOLUME = 2.0 # 允许原声音量超过1.0以平衡TTS
|
||
|
||
# 智能音量调整
|
||
ENABLE_SMART_VOLUME = True # 是否启用智能音量分析和调整
|
||
|
||
|
||
class VideoConcatMode(str, Enum):
|
||
random = "random"
|
||
sequential = "sequential"
|
||
|
||
|
||
class VideoAspect(str, Enum):
|
||
landscape = "16:9"
|
||
landscape_2 = "4:3"
|
||
portrait = "9:16"
|
||
portrait_2 = "3:4"
|
||
square = "1:1"
|
||
|
||
def to_resolution(self):
|
||
if self == VideoAspect.landscape.value:
|
||
return 1920, 1080
|
||
elif self == VideoAspect.portrait.value:
|
||
return 1080, 1920
|
||
elif self == VideoAspect.square.value:
|
||
return 1080, 1080
|
||
return 1080, 1920
|
||
|
||
|
||
class _Config:
|
||
arbitrary_types_allowed = True
|
||
|
||
|
||
@pydantic.dataclasses.dataclass(config=_Config)
|
||
class MaterialInfo:
|
||
provider: str = "pexels"
|
||
url: str = ""
|
||
duration: int = 0
|
||
|
||
|
||
# VoiceNames = [
|
||
# # zh-CN
|
||
# "female-zh-CN-XiaoxiaoNeural",
|
||
# "female-zh-CN-XiaoyiNeural",
|
||
# "female-zh-CN-liaoning-XiaobeiNeural",
|
||
# "female-zh-CN-shaanxi-XiaoniNeural",
|
||
#
|
||
# "male-zh-CN-YunjianNeural",
|
||
# "male-zh-CN-YunxiNeural",
|
||
# "male-zh-CN-YunxiaNeural",
|
||
# "male-zh-CN-YunyangNeural",
|
||
#
|
||
# # "female-zh-HK-HiuGaaiNeural",
|
||
# # "female-zh-HK-HiuMaanNeural",
|
||
# # "male-zh-HK-WanLungNeural",
|
||
# #
|
||
# # "female-zh-TW-HsiaoChenNeural",
|
||
# # "female-zh-TW-HsiaoYuNeural",
|
||
# # "male-zh-TW-YunJheNeural",
|
||
#
|
||
# # en-US
|
||
# "female-en-US-AnaNeural",
|
||
# "female-en-US-AriaNeural",
|
||
# "female-en-US-AvaNeural",
|
||
# "female-en-US-EmmaNeural",
|
||
# "female-en-US-JennyNeural",
|
||
# "female-en-US-MichelleNeural",
|
||
#
|
||
# "male-en-US-AndrewNeural",
|
||
# "male-en-US-BrianNeural",
|
||
# "male-en-US-ChristopherNeural",
|
||
# "male-en-US-EricNeural",
|
||
# "male-en-US-GuyNeural",
|
||
# "male-en-US-RogerNeural",
|
||
# "male-en-US-SteffanNeural",
|
||
# ]
|
||
|
||
|
||
class VideoParams(BaseModel):
|
||
"""
|
||
{
|
||
"video_subject": "",
|
||
"video_aspect": "横屏 16:9(西瓜视频)",
|
||
"voice_name": "女生-晓晓",
|
||
"bgm_name": "random",
|
||
"font_name": "STHeitiMedium 黑体-中",
|
||
"text_color": "#FFFFFF",
|
||
"font_size": 60,
|
||
"stroke_color": "#000000",
|
||
"stroke_width": 1.5
|
||
}
|
||
"""
|
||
|
||
video_subject: str
|
||
video_script: str = "" # 用于生成视频的脚本
|
||
video_terms: Optional[Union[str, list]] = None # 用于生成视频的关键词
|
||
video_aspect: Optional[VideoAspect] = VideoAspect.portrait.value
|
||
video_concat_mode: Optional[VideoConcatMode] = VideoConcatMode.random.value
|
||
video_clip_duration: Optional[int] = 5
|
||
video_count: Optional[int] = 1
|
||
|
||
video_source: Optional[str] = "pexels"
|
||
video_materials: Optional[List[MaterialInfo]] = None # 用于生成视频的素材
|
||
|
||
video_language: Optional[str] = "" # auto detect
|
||
|
||
voice_name: Optional[str] = ""
|
||
voice_volume: Optional[float] = AudioVolumeDefaults.VOICE_VOLUME
|
||
voice_rate: Optional[float] = 1.0
|
||
bgm_type: Optional[str] = "random"
|
||
bgm_file: Optional[str] = ""
|
||
bgm_volume: Optional[float] = AudioVolumeDefaults.BGM_VOLUME
|
||
|
||
subtitle_enabled: Optional[bool] = True
|
||
subtitle_position: Optional[str] = "bottom" # top, bottom, center
|
||
custom_position: float = 70.0
|
||
font_name: Optional[str] = "STHeitiMedium.ttc"
|
||
text_fore_color: Optional[str] = "#FFFFFF"
|
||
text_background_color: Optional[str] = "transparent"
|
||
|
||
font_size: int = 60
|
||
stroke_color: Optional[str] = "#000000"
|
||
stroke_width: float = 1.5
|
||
n_threads: Optional[int] = 2
|
||
paragraph_number: Optional[int] = 1
|
||
|
||
|
||
|
||
|
||
|
||
class VideoClipParams(BaseModel):
|
||
"""
|
||
NarratoAI 数据模型
|
||
"""
|
||
video_clip_json: Optional[list] = Field(default=[], description="LLM 生成的视频剪辑脚本内容")
|
||
video_clip_json_path: Optional[str] = Field(default="", description="LLM 生成的视频剪辑脚本路径")
|
||
video_origin_path: Optional[str] = Field(default="", description="原视频路径")
|
||
video_origin_paths: Optional[List[str]] = Field(default=[], description="原视频路径列表")
|
||
original_subtitle_path: Optional[str] = Field(default="", description="原视频字幕路径")
|
||
original_subtitle_paths: Optional[List[str]] = Field(default=[], description="原视频字幕路径列表")
|
||
video_aspect: Optional[VideoAspect] = Field(default=VideoAspect.portrait.value, description="视频比例")
|
||
video_language: Optional[str] = Field(default="zh-CN", description="视频语言")
|
||
|
||
# video_clip_duration: Optional[int] = 5 # 视频片段时长
|
||
# video_count: Optional[int] = 1 # 视频片段数量
|
||
# video_source: Optional[str] = "local"
|
||
# video_concat_mode: Optional[VideoConcatMode] = VideoConcatMode.random.value
|
||
|
||
voice_name: Optional[str] = Field(default="zh-CN-YunjianNeural", description="语音名称")
|
||
voice_volume: Optional[float] = Field(default=AudioVolumeDefaults.VOICE_VOLUME, description="解说语音音量")
|
||
voice_rate: Optional[float] = Field(default=1.0, description="语速")
|
||
voice_pitch: Optional[float] = Field(default=1.0, description="语调")
|
||
tts_engine: Optional[str] = Field(default="", description="TTS 引擎")
|
||
bgm_name: Optional[str] = Field(default="random", description="背景音乐名称")
|
||
bgm_type: Optional[str] = Field(default="random", description="背景音乐类型")
|
||
bgm_file: Optional[str] = Field(default="", description="背景音乐文件")
|
||
|
||
subtitle_enabled: bool = True
|
||
subtitle_mask_enabled: bool = False
|
||
subtitle_mask_landscape_x_percent: float = 10.0
|
||
subtitle_mask_landscape_y_percent: float = 78.0
|
||
subtitle_mask_landscape_width_percent: float = 80.0
|
||
subtitle_mask_landscape_height_percent: float = 14.0
|
||
subtitle_mask_landscape_blur_radius: int = 18
|
||
subtitle_mask_landscape_opacity_percent: int = 82
|
||
subtitle_mask_portrait_x_percent: float = 8.0
|
||
subtitle_mask_portrait_y_percent: float = 79.0
|
||
subtitle_mask_portrait_width_percent: float = 84.0
|
||
subtitle_mask_portrait_height_percent: float = 16.0
|
||
subtitle_mask_portrait_blur_radius: int = 26
|
||
subtitle_mask_portrait_opacity_percent: int = 84
|
||
subtitle_position_landscape_y_percent: float = 85.0
|
||
subtitle_position_portrait_y_percent: float = 82.0
|
||
subtitle_auto_transcribe_enabled: bool = False
|
||
subtitle_auto_transcribe_backend: str = "local"
|
||
subtitle_auto_transcribe_api_url: str = ""
|
||
subtitle_auto_transcribe_firered_api_url: str = ""
|
||
subtitle_auto_transcribe_api_key: str = ""
|
||
subtitle_auto_transcribe_hotword: str = ""
|
||
subtitle_auto_transcribe_enable_spk: bool = False
|
||
font_name: str = "SimHei" # 默认使用黑体
|
||
font_size: int = 36
|
||
text_fore_color: str = "white" # 文本前景色
|
||
text_back_color: Optional[str] = None # 文本背景色
|
||
stroke_color: str = "black" # 描边颜色
|
||
stroke_width: float = 1.5 # 描边宽度
|
||
subtitle_position: str = "bottom" # top, bottom, center, custom
|
||
custom_position: float = 70.0 # 自定义位置
|
||
|
||
n_threads: Optional[int] = Field(default=16, description="线程数") # 线程数,有助于提升视频处理速度
|
||
|
||
tts_volume: Optional[float] = Field(default=AudioVolumeDefaults.TTS_VOLUME, description="解说语音音量(后处理)")
|
||
original_volume: Optional[float] = Field(default=AudioVolumeDefaults.ORIGINAL_VOLUME, description="视频原声音量")
|
||
bgm_volume: Optional[float] = Field(default=AudioVolumeDefaults.BGM_VOLUME, description="背景音乐音量")
|
||
draft_name: Optional[str] = Field(default="", description="剪映草稿名称")
|
||
|
||
|
||
|
||
|
||
|
||
class SubtitlePosition(str, Enum):
|
||
TOP = "top"
|
||
CENTER = "center"
|
||
BOTTOM = "bottom"
|