mirror of
https://github.com/linyqh/NarratoAI.git
synced 2025-12-11 18:42:49 +00:00
209 lines
6.7 KiB
Python
209 lines
6.7 KiB
Python
import warnings
|
||
from enum import Enum
|
||
from typing import Any, List, Optional, Union
|
||
|
||
import pydantic
|
||
from pydantic import BaseModel, Field
|
||
|
||
# 忽略 Pydantic 的特定警告
|
||
warnings.filterwarnings(
|
||
"ignore",
|
||
category=UserWarning,
|
||
message="Field name.*shadows an attribute in parent.*",
|
||
)
|
||
|
||
|
||
class AudioVolumeDefaults:
|
||
"""音量配置默认值常量类 - 确保全局一致性"""
|
||
|
||
# 语音音量默认值
|
||
VOICE_VOLUME = 1.0
|
||
TTS_VOLUME = 1.0
|
||
|
||
# 原声音量默认值 - 提高原声音量以平衡TTS
|
||
ORIGINAL_VOLUME = 1.2
|
||
|
||
# 背景音乐音量默认值
|
||
BGM_VOLUME = 0.3
|
||
|
||
# 音量范围
|
||
MIN_VOLUME = 0.0
|
||
MAX_VOLUME = 2.0 # 允许原声音量超过1.0以平衡TTS
|
||
|
||
# 智能音量调整
|
||
ENABLE_SMART_VOLUME = True # 是否启用智能音量分析和调整
|
||
|
||
|
||
class VideoConcatMode(str, Enum):
|
||
random = "random"
|
||
sequential = "sequential"
|
||
|
||
|
||
class VideoAspect(str, Enum):
|
||
landscape = "16:9"
|
||
landscape_2 = "4:3"
|
||
portrait = "9:16"
|
||
portrait_2 = "3:4"
|
||
square = "1:1"
|
||
|
||
def to_resolution(self):
|
||
if self == VideoAspect.landscape.value:
|
||
return 1920, 1080
|
||
elif self == VideoAspect.portrait.value:
|
||
return 1080, 1920
|
||
elif self == VideoAspect.square.value:
|
||
return 1080, 1080
|
||
return 1080, 1920
|
||
|
||
|
||
class _Config:
|
||
arbitrary_types_allowed = True
|
||
|
||
|
||
@pydantic.dataclasses.dataclass(config=_Config)
|
||
class MaterialInfo:
|
||
provider: str = "pexels"
|
||
url: str = ""
|
||
duration: int = 0
|
||
|
||
|
||
# VoiceNames = [
|
||
# # zh-CN
|
||
# "female-zh-CN-XiaoxiaoNeural",
|
||
# "female-zh-CN-XiaoyiNeural",
|
||
# "female-zh-CN-liaoning-XiaobeiNeural",
|
||
# "female-zh-CN-shaanxi-XiaoniNeural",
|
||
#
|
||
# "male-zh-CN-YunjianNeural",
|
||
# "male-zh-CN-YunxiNeural",
|
||
# "male-zh-CN-YunxiaNeural",
|
||
# "male-zh-CN-YunyangNeural",
|
||
#
|
||
# # "female-zh-HK-HiuGaaiNeural",
|
||
# # "female-zh-HK-HiuMaanNeural",
|
||
# # "male-zh-HK-WanLungNeural",
|
||
# #
|
||
# # "female-zh-TW-HsiaoChenNeural",
|
||
# # "female-zh-TW-HsiaoYuNeural",
|
||
# # "male-zh-TW-YunJheNeural",
|
||
#
|
||
# # en-US
|
||
# "female-en-US-AnaNeural",
|
||
# "female-en-US-AriaNeural",
|
||
# "female-en-US-AvaNeural",
|
||
# "female-en-US-EmmaNeural",
|
||
# "female-en-US-JennyNeural",
|
||
# "female-en-US-MichelleNeural",
|
||
#
|
||
# "male-en-US-AndrewNeural",
|
||
# "male-en-US-BrianNeural",
|
||
# "male-en-US-ChristopherNeural",
|
||
# "male-en-US-EricNeural",
|
||
# "male-en-US-GuyNeural",
|
||
# "male-en-US-RogerNeural",
|
||
# "male-en-US-SteffanNeural",
|
||
# ]
|
||
|
||
|
||
class VideoParams(BaseModel):
|
||
"""
|
||
{
|
||
"video_subject": "",
|
||
"video_aspect": "横屏 16:9(西瓜视频)",
|
||
"voice_name": "女生-晓晓",
|
||
"bgm_name": "random",
|
||
"font_name": "STHeitiMedium 黑体-中",
|
||
"text_color": "#FFFFFF",
|
||
"font_size": 60,
|
||
"stroke_color": "#000000",
|
||
"stroke_width": 1.5
|
||
}
|
||
"""
|
||
|
||
video_subject: str
|
||
video_script: str = "" # 用于生成视频的脚本
|
||
video_terms: Optional[Union[str, list]] = None # 用于生成视频的关键词
|
||
video_aspect: Optional[VideoAspect] = VideoAspect.portrait.value
|
||
video_concat_mode: Optional[VideoConcatMode] = VideoConcatMode.random.value
|
||
video_clip_duration: Optional[int] = 5
|
||
video_count: Optional[int] = 1
|
||
|
||
video_source: Optional[str] = "pexels"
|
||
video_materials: Optional[List[MaterialInfo]] = None # 用于生成视频的素材
|
||
|
||
video_language: Optional[str] = "" # auto detect
|
||
|
||
voice_name: Optional[str] = ""
|
||
voice_volume: Optional[float] = AudioVolumeDefaults.VOICE_VOLUME
|
||
voice_rate: Optional[float] = 1.0
|
||
bgm_type: Optional[str] = "random"
|
||
bgm_file: Optional[str] = ""
|
||
bgm_volume: Optional[float] = AudioVolumeDefaults.BGM_VOLUME
|
||
|
||
subtitle_enabled: Optional[bool] = True
|
||
subtitle_position: Optional[str] = "bottom" # top, bottom, center
|
||
custom_position: float = 70.0
|
||
font_name: Optional[str] = "STHeitiMedium.ttc"
|
||
text_fore_color: Optional[str] = "#FFFFFF"
|
||
text_background_color: Optional[str] = "transparent"
|
||
|
||
font_size: int = 60
|
||
stroke_color: Optional[str] = "#000000"
|
||
stroke_width: float = 1.5
|
||
n_threads: Optional[int] = 2
|
||
paragraph_number: Optional[int] = 1
|
||
|
||
|
||
|
||
|
||
|
||
class VideoClipParams(BaseModel):
|
||
"""
|
||
NarratoAI 数据模型
|
||
"""
|
||
video_clip_json: Optional[list] = Field(default=[], description="LLM 生成的视频剪辑脚本内容")
|
||
video_clip_json_path: Optional[str] = Field(default="", description="LLM 生成的视频剪辑脚本路径")
|
||
video_origin_path: Optional[str] = Field(default="", description="原视频路径")
|
||
video_aspect: Optional[VideoAspect] = Field(default=VideoAspect.portrait.value, description="视频比例")
|
||
video_language: Optional[str] = Field(default="zh-CN", description="视频语言")
|
||
|
||
# video_clip_duration: Optional[int] = 5 # 视频片段时长
|
||
# video_count: Optional[int] = 1 # 视频片段数量
|
||
# video_source: Optional[str] = "local"
|
||
# video_concat_mode: Optional[VideoConcatMode] = VideoConcatMode.random.value
|
||
|
||
voice_name: Optional[str] = Field(default="zh-CN-YunjianNeural", description="语音名称")
|
||
voice_volume: Optional[float] = Field(default=AudioVolumeDefaults.VOICE_VOLUME, description="解说语音音量")
|
||
voice_rate: Optional[float] = Field(default=1.0, description="语速")
|
||
voice_pitch: Optional[float] = Field(default=1.0, description="语调")
|
||
tts_engine: Optional[str] = Field(default="tencent", description="TTS 引擎")
|
||
bgm_name: Optional[str] = Field(default="random", description="背景音乐名称")
|
||
bgm_type: Optional[str] = Field(default="random", description="背景音乐类型")
|
||
bgm_file: Optional[str] = Field(default="", description="背景音乐文件")
|
||
|
||
subtitle_enabled: bool = True
|
||
font_name: str = "SimHei" # 默认使用黑体
|
||
font_size: int = 36
|
||
text_fore_color: str = "white" # 文本前景色
|
||
text_back_color: Optional[str] = None # 文本背景色
|
||
stroke_color: str = "black" # 描边颜色
|
||
stroke_width: float = 1.5 # 描边宽度
|
||
subtitle_position: str = "bottom" # top, bottom, center, custom
|
||
custom_position: float = 70.0 # 自定义位置
|
||
|
||
n_threads: Optional[int] = Field(default=16, description="线程数") # 线程数,有助于提升视频处理速度
|
||
|
||
tts_volume: Optional[float] = Field(default=AudioVolumeDefaults.TTS_VOLUME, description="解说语音音量(后处理)")
|
||
original_volume: Optional[float] = Field(default=AudioVolumeDefaults.ORIGINAL_VOLUME, description="视频原声音量")
|
||
bgm_volume: Optional[float] = Field(default=AudioVolumeDefaults.BGM_VOLUME, description="背景音乐音量")
|
||
|
||
|
||
|
||
|
||
|
||
class SubtitlePosition(str, Enum):
|
||
TOP = "top"
|
||
CENTER = "center"
|
||
BOTTOM = "bottom"
|
||
|