NarratoAI/app/models/schema.py
linyq 401eb92fa3 feat(audio): 改进音频合并功能,支持 OST 设置,提升时间戳精度
-重构了 merge_audio_files 函数,增加了对 OST 设置的支持
- 新增 time_to_seconds 函数,支持多种时间格式的转换
- 修改了 audio_merger 模块的逻辑,根据 OST 设置处理音频
- 更新了 task 模块中的 start_subclip 函数,传入 OST 信息
- 优化了 subtitle 和 video 模块的逻辑,适应新的音频处理方式
2024-11-27 23:26:43 +08:00

381 lines
12 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import warnings
from enum import Enum
from typing import Any, List, Optional
import pydantic
from pydantic import BaseModel, Field
# 忽略 Pydantic 的特定警告
warnings.filterwarnings(
"ignore",
category=UserWarning,
message="Field name.*shadows an attribute in parent.*",
)
class VideoConcatMode(str, Enum):
random = "random"
sequential = "sequential"
class VideoAspect(str, Enum):
landscape = "16:9"
portrait = "9:16"
square = "1:1"
def to_resolution(self):
if self == VideoAspect.landscape.value:
return 1920, 1080
elif self == VideoAspect.portrait.value:
return 1080, 1920
elif self == VideoAspect.square.value:
return 1080, 1080
return 1080, 1920
class _Config:
arbitrary_types_allowed = True
@pydantic.dataclasses.dataclass(config=_Config)
class MaterialInfo:
provider: str = "pexels"
url: str = ""
duration: int = 0
# VoiceNames = [
# # zh-CN
# "female-zh-CN-XiaoxiaoNeural",
# "female-zh-CN-XiaoyiNeural",
# "female-zh-CN-liaoning-XiaobeiNeural",
# "female-zh-CN-shaanxi-XiaoniNeural",
#
# "male-zh-CN-YunjianNeural",
# "male-zh-CN-YunxiNeural",
# "male-zh-CN-YunxiaNeural",
# "male-zh-CN-YunyangNeural",
#
# # "female-zh-HK-HiuGaaiNeural",
# # "female-zh-HK-HiuMaanNeural",
# # "male-zh-HK-WanLungNeural",
# #
# # "female-zh-TW-HsiaoChenNeural",
# # "female-zh-TW-HsiaoYuNeural",
# # "male-zh-TW-YunJheNeural",
#
# # en-US
# "female-en-US-AnaNeural",
# "female-en-US-AriaNeural",
# "female-en-US-AvaNeural",
# "female-en-US-EmmaNeural",
# "female-en-US-JennyNeural",
# "female-en-US-MichelleNeural",
#
# "male-en-US-AndrewNeural",
# "male-en-US-BrianNeural",
# "male-en-US-ChristopherNeural",
# "male-en-US-EricNeural",
# "male-en-US-GuyNeural",
# "male-en-US-RogerNeural",
# "male-en-US-SteffanNeural",
# ]
class VideoParams(BaseModel):
"""
{
"video_subject": "",
"video_aspect": "横屏 16:9西瓜视频",
"voice_name": "女生-晓晓",
"bgm_name": "random",
"font_name": "STHeitiMedium 黑体-中",
"text_color": "#FFFFFF",
"font_size": 60,
"stroke_color": "#000000",
"stroke_width": 1.5
}
"""
video_subject: str
video_script: str = "" # 用于生成视频的脚本
video_terms: Optional[str | list] = None # 用于生成视频的关键词
video_aspect: Optional[VideoAspect] = VideoAspect.portrait.value
video_concat_mode: Optional[VideoConcatMode] = VideoConcatMode.random.value
video_clip_duration: Optional[int] = 5
video_count: Optional[int] = 1
video_source: Optional[str] = "pexels"
video_materials: Optional[List[MaterialInfo]] = None # 用于生成视频的素材
video_language: Optional[str] = "" # auto detect
voice_name: Optional[str] = ""
voice_volume: Optional[float] = 1.0
voice_rate: Optional[float] = 1.0
bgm_type: Optional[str] = "random"
bgm_file: Optional[str] = ""
bgm_volume: Optional[float] = 0.2
subtitle_enabled: Optional[bool] = True
subtitle_position: Optional[str] = "bottom" # top, bottom, center
custom_position: float = 70.0
font_name: Optional[str] = "STHeitiMedium.ttc"
text_fore_color: Optional[str] = "#FFFFFF"
text_background_color: Optional[str] = "transparent"
font_size: int = 60
stroke_color: Optional[str] = "#000000"
stroke_width: float = 1.5
n_threads: Optional[int] = 2
paragraph_number: Optional[int] = 1
class SubtitleRequest(BaseModel):
video_script: str
video_language: Optional[str] = ""
voice_name: Optional[str] = "zh-CN-XiaoxiaoNeural-Female"
voice_volume: Optional[float] = 1.0
voice_rate: Optional[float] = 1.2
bgm_type: Optional[str] = "random"
bgm_file: Optional[str] = ""
bgm_volume: Optional[float] = 0.2
subtitle_position: Optional[str] = "bottom"
font_name: Optional[str] = "STHeitiMedium.ttc"
text_fore_color: Optional[str] = "#FFFFFF"
text_background_color: Optional[str] = "transparent"
font_size: int = 60
stroke_color: Optional[str] = "#000000"
stroke_width: float = 1.5
video_source: Optional[str] = "local"
subtitle_enabled: Optional[str] = "true"
class AudioRequest(BaseModel):
video_script: str
video_language: Optional[str] = ""
voice_name: Optional[str] = "zh-CN-XiaoxiaoNeural-Female"
voice_volume: Optional[float] = 1.0
voice_rate: Optional[float] = 1.2
bgm_type: Optional[str] = "random"
bgm_file: Optional[str] = ""
bgm_volume: Optional[float] = 0.2
video_source: Optional[str] = "local"
class VideoScriptParams:
"""
{
"video_subject": "春天的花海",
"video_language": "",
"paragraph_number": 1
}
"""
video_subject: Optional[str] = "春天的花海"
video_language: Optional[str] = ""
paragraph_number: Optional[int] = 1
class VideoTermsParams:
"""
{
"video_subject": "",
"video_script": "",
"amount": 5
}
"""
video_subject: Optional[str] = "春天的花海"
video_script: Optional[str] = (
"春天的花海,如诗如画般展现在眼前。万物复苏的季节里,大地披上了一袭绚丽多彩的盛装。金黄的迎春、粉嫩的樱花、洁白的梨花、艳丽的郁金香……"
)
amount: Optional[int] = 5
class BaseResponse(BaseModel):
status: int = 200
message: Optional[str] = "success"
data: Any = None
class TaskVideoRequest(VideoParams, BaseModel):
pass
class TaskQueryRequest(BaseModel):
pass
class VideoScriptRequest(VideoScriptParams, BaseModel):
pass
class VideoTermsRequest(VideoTermsParams, BaseModel):
pass
######################################################################################################
######################################################################################################
######################################################################################################
######################################################################################################
class TaskResponse(BaseResponse):
class TaskResponseData(BaseModel):
task_id: str
data: TaskResponseData
class Config:
json_schema_extra = {
"example": {
"status": 200,
"message": "success",
"data": {"task_id": "6c85c8cc-a77a-42b9-bc30-947815aa0558"},
},
}
class TaskQueryResponse(BaseResponse):
class Config:
json_schema_extra = {
"example": {
"status": 200,
"message": "success",
"data": {
"state": 1,
"progress": 100,
"videos": [
"http://127.0.0.1:8080/tasks/6c85c8cc-a77a-42b9-bc30-947815aa0558/final-1.mp4"
],
"combined_videos": [
"http://127.0.0.1:8080/tasks/6c85c8cc-a77a-42b9-bc30-947815aa0558/combined-1.mp4"
],
},
},
}
class TaskDeletionResponse(BaseResponse):
class Config:
json_schema_extra = {
"example": {
"status": 200,
"message": "success",
"data": {
"state": 1,
"progress": 100,
"videos": [
"http://127.0.0.1:8080/tasks/6c85c8cc-a77a-42b9-bc30-947815aa0558/final-1.mp4"
],
"combined_videos": [
"http://127.0.0.1:8080/tasks/6c85c8cc-a77a-42b9-bc30-947815aa0558/combined-1.mp4"
],
},
},
}
class VideoScriptResponse(BaseResponse):
class Config:
json_schema_extra = {
"example": {
"status": 200,
"message": "success",
"data": {
"video_script": "春天的花海,是大自然的一幅美丽画卷。在这个季节里,大地复苏,万物生长,花朵争相绽放,形成了一片五彩斑斓的花海..."
},
},
}
class VideoTermsResponse(BaseResponse):
class Config:
json_schema_extra = {
"example": {
"status": 200,
"message": "success",
"data": {"video_terms": ["sky", "tree"]},
},
}
class BgmRetrieveResponse(BaseResponse):
class Config:
json_schema_extra = {
"example": {
"status": 200,
"message": "success",
"data": {
"files": [
{
"name": "output013.mp3",
"size": 1891269,
"file": "/NarratoAI/resource/songs/output013.mp3",
}
]
},
},
}
class BgmUploadResponse(BaseResponse):
class Config:
json_schema_extra = {
"example": {
"status": 200,
"message": "success",
"data": {"file": "/NarratoAI/resource/songs/example.mp3"},
},
}
class VideoClipParams(BaseModel):
"""
NarratoAI 数据模型
"""
video_clip_json: Optional[list] = Field(default=[], description="LLM 生成的视频剪辑脚本内容")
video_clip_json_path: Optional[str] = Field(default="", description="LLM 生成的视频剪辑脚本路径")
video_origin_path: Optional[str] = Field(default="", description="原视频路径")
video_aspect: Optional[VideoAspect] = Field(default=VideoAspect.portrait.value, description="视频比例")
video_language: Optional[str] = Field(default="zh-CN", description="视频语言")
# video_clip_duration: Optional[int] = 5 # 视频片段时长
# video_count: Optional[int] = 1 # 视频片段数量
# video_source: Optional[str] = "local"
# video_concat_mode: Optional[VideoConcatMode] = VideoConcatMode.random.value
voice_name: Optional[str] = Field(default="zh-CN-YunjianNeural", description="语音名称")
voice_volume: Optional[float] = Field(default=1.0, description="语音音量")
voice_rate: Optional[float] = Field(default=1.0, description="语速")
voice_pitch: Optional[float] = Field(default=1.0, description="语调")
bgm_name: Optional[str] = Field(default="random", description="背景音乐名称")
bgm_type: Optional[str] = Field(default="random", description="背景音乐类型")
bgm_file: Optional[str] = Field(default="", description="背景音乐文件")
bgm_volume: Optional[float] = Field(default=0.2, description="背景音乐音量")
subtitle_enabled: Optional[bool] = Field(default=True, description="是否启用字幕")
subtitle_position: Optional[str] = Field(default="bottom", description="字幕位置") # top, bottom, center
font_name: Optional[str] = Field(default="STHeitiMedium.ttc", description="字体名称")
text_fore_color: Optional[str] = Field(default="#FFFFFF", description="文字前景色")
text_background_color: Optional[str] = Field(default="transparent", description="文字背景色")
font_size: int = Field(default=60, description="文字大小")
stroke_color: Optional[str] = Field(default="#000000", description="文字描边颜色")
stroke_width: float = Field(default=1.5, description="文字描边宽度")
custom_position: float = Field(default=70.0, description="自定义位置")
n_threads: Optional[int] = 8 # 线程数,有助于提升视频处理速度
tts_volume: float = 1.0 # TTS音频音量
video_volume: float = 0.1 # 视频原声音量
class VideoTranscriptionRequest(BaseModel):
video_name: str
language: str = "zh-CN"
class Config:
arbitrary_types_allowed = True
class VideoTranscriptionResponse(BaseModel):
transcription: str