mirror of
https://github.com/linyqh/NarratoAI.git
synced 2026-01-25 21:38:12 +00:00
commit
cb02f2c897
@ -48,6 +48,7 @@ def save_config():
|
||||
with open(config_file, "w", encoding="utf-8") as f:
|
||||
_cfg["app"] = app
|
||||
_cfg["azure"] = azure
|
||||
_cfg["soulvoice"] = soulvoice
|
||||
_cfg["ui"] = ui
|
||||
f.write(toml.dumps(_cfg))
|
||||
|
||||
@ -57,6 +58,7 @@ app = _cfg.get("app", {})
|
||||
whisper = _cfg.get("whisper", {})
|
||||
proxy = _cfg.get("proxy", {})
|
||||
azure = _cfg.get("azure", {})
|
||||
soulvoice = _cfg.get("soulvoice", {})
|
||||
ui = _cfg.get("ui", {})
|
||||
frames = _cfg.get("frames", {})
|
||||
|
||||
|
||||
@ -546,6 +546,359 @@ def try_fallback_encoding(
|
||||
return execute_simple_command(fallback_cmd, timestamp, "通用Fallback")
|
||||
|
||||
|
||||
def _process_narration_only_segment(
|
||||
video_origin_path: str,
|
||||
script_item: Dict,
|
||||
tts_map: Dict,
|
||||
output_dir: str,
|
||||
encoder_config: Dict,
|
||||
hwaccel_args: List[str]
|
||||
) -> Optional[str]:
|
||||
"""
|
||||
处理OST=0的纯解说片段
|
||||
- 根据TTS音频时长动态裁剪
|
||||
- 移除原声,生成静音视频
|
||||
"""
|
||||
_id = script_item["_id"]
|
||||
timestamp = script_item["timestamp"]
|
||||
|
||||
# 获取对应的TTS结果
|
||||
tts_item = tts_map.get(_id)
|
||||
if not tts_item:
|
||||
logger.error(f"未找到片段 {_id} 的TTS结果")
|
||||
return None
|
||||
|
||||
# 解析起始时间,使用TTS音频时长计算结束时间
|
||||
start_time, _ = parse_timestamp(timestamp)
|
||||
duration = tts_item["duration"]
|
||||
calculated_end_time = calculate_end_time(start_time, duration, extra_seconds=0)
|
||||
|
||||
# 转换为FFmpeg兼容的时间格式
|
||||
ffmpeg_start_time = start_time.replace(',', '.')
|
||||
ffmpeg_end_time = calculated_end_time.replace(',', '.')
|
||||
|
||||
# 生成输出文件名
|
||||
safe_start_time = start_time.replace(':', '-').replace(',', '-')
|
||||
safe_end_time = calculated_end_time.replace(':', '-').replace(',', '-')
|
||||
output_filename = f"ost0_vid_{safe_start_time}@{safe_end_time}.mp4"
|
||||
output_path = os.path.join(output_dir, output_filename)
|
||||
|
||||
# 构建FFmpeg命令 - 移除音频
|
||||
cmd = _build_ffmpeg_command_with_audio_control(
|
||||
video_origin_path, output_path, ffmpeg_start_time, ffmpeg_end_time,
|
||||
encoder_config, hwaccel_args, remove_audio=True
|
||||
)
|
||||
|
||||
# 执行命令
|
||||
success = execute_ffmpeg_with_fallback(
|
||||
cmd, timestamp, video_origin_path, output_path,
|
||||
ffmpeg_start_time, ffmpeg_end_time
|
||||
)
|
||||
|
||||
return output_path if success else None
|
||||
|
||||
|
||||
def _process_original_audio_segment(
|
||||
video_origin_path: str,
|
||||
script_item: Dict,
|
||||
output_dir: str,
|
||||
encoder_config: Dict,
|
||||
hwaccel_args: List[str]
|
||||
) -> Optional[str]:
|
||||
"""
|
||||
处理OST=1的纯原声片段
|
||||
- 严格按照脚本timestamp精确裁剪
|
||||
- 保持原声不变
|
||||
"""
|
||||
_id = script_item["_id"]
|
||||
timestamp = script_item["timestamp"]
|
||||
|
||||
# 严格按照timestamp进行裁剪
|
||||
start_time, end_time = parse_timestamp(timestamp)
|
||||
|
||||
# 转换为FFmpeg兼容的时间格式
|
||||
ffmpeg_start_time = start_time.replace(',', '.')
|
||||
ffmpeg_end_time = end_time.replace(',', '.')
|
||||
|
||||
# 生成输出文件名
|
||||
safe_start_time = start_time.replace(':', '-').replace(',', '-')
|
||||
safe_end_time = end_time.replace(':', '-').replace(',', '-')
|
||||
output_filename = f"ost1_vid_{safe_start_time}@{safe_end_time}.mp4"
|
||||
output_path = os.path.join(output_dir, output_filename)
|
||||
|
||||
# 构建FFmpeg命令 - 保持原声
|
||||
cmd = _build_ffmpeg_command_with_audio_control(
|
||||
video_origin_path, output_path, ffmpeg_start_time, ffmpeg_end_time,
|
||||
encoder_config, hwaccel_args, remove_audio=False
|
||||
)
|
||||
|
||||
# 执行命令
|
||||
success = execute_ffmpeg_with_fallback(
|
||||
cmd, timestamp, video_origin_path, output_path,
|
||||
ffmpeg_start_time, ffmpeg_end_time
|
||||
)
|
||||
|
||||
return output_path if success else None
|
||||
|
||||
|
||||
def _process_mixed_segment(
|
||||
video_origin_path: str,
|
||||
script_item: Dict,
|
||||
tts_map: Dict,
|
||||
output_dir: str,
|
||||
encoder_config: Dict,
|
||||
hwaccel_args: List[str]
|
||||
) -> Optional[str]:
|
||||
"""
|
||||
处理OST=2的解说+原声混合片段
|
||||
- 根据TTS音频时长动态裁剪
|
||||
- 保持原声,确保视频时长等于TTS音频时长
|
||||
"""
|
||||
_id = script_item["_id"]
|
||||
timestamp = script_item["timestamp"]
|
||||
|
||||
# 获取对应的TTS结果
|
||||
tts_item = tts_map.get(_id)
|
||||
if not tts_item:
|
||||
logger.error(f"未找到片段 {_id} 的TTS结果")
|
||||
return None
|
||||
|
||||
# 解析起始时间,使用TTS音频时长计算结束时间
|
||||
start_time, _ = parse_timestamp(timestamp)
|
||||
duration = tts_item["duration"]
|
||||
calculated_end_time = calculate_end_time(start_time, duration, extra_seconds=0)
|
||||
|
||||
# 转换为FFmpeg兼容的时间格式
|
||||
ffmpeg_start_time = start_time.replace(',', '.')
|
||||
ffmpeg_end_time = calculated_end_time.replace(',', '.')
|
||||
|
||||
# 生成输出文件名
|
||||
safe_start_time = start_time.replace(':', '-').replace(',', '-')
|
||||
safe_end_time = calculated_end_time.replace(':', '-').replace(',', '-')
|
||||
output_filename = f"ost2_vid_{safe_start_time}@{safe_end_time}.mp4"
|
||||
output_path = os.path.join(output_dir, output_filename)
|
||||
|
||||
# 构建FFmpeg命令 - 保持原声
|
||||
cmd = _build_ffmpeg_command_with_audio_control(
|
||||
video_origin_path, output_path, ffmpeg_start_time, ffmpeg_end_time,
|
||||
encoder_config, hwaccel_args, remove_audio=False
|
||||
)
|
||||
|
||||
# 执行命令
|
||||
success = execute_ffmpeg_with_fallback(
|
||||
cmd, timestamp, video_origin_path, output_path,
|
||||
ffmpeg_start_time, ffmpeg_end_time
|
||||
)
|
||||
|
||||
return output_path if success else None
|
||||
|
||||
|
||||
def _build_ffmpeg_command_with_audio_control(
|
||||
input_path: str,
|
||||
output_path: str,
|
||||
start_time: str,
|
||||
end_time: str,
|
||||
encoder_config: Dict[str, str],
|
||||
hwaccel_args: List[str] = None,
|
||||
remove_audio: bool = False
|
||||
) -> List[str]:
|
||||
"""
|
||||
构建支持音频控制的FFmpeg命令
|
||||
|
||||
Args:
|
||||
input_path: 输入视频路径
|
||||
output_path: 输出视频路径
|
||||
start_time: 开始时间
|
||||
end_time: 结束时间
|
||||
encoder_config: 编码器配置
|
||||
hwaccel_args: 硬件加速参数
|
||||
remove_audio: 是否移除音频(OST=0时为True)
|
||||
|
||||
Returns:
|
||||
List[str]: ffmpeg命令列表
|
||||
"""
|
||||
cmd = ["ffmpeg", "-y"]
|
||||
|
||||
# 硬件加速设置(参考原有逻辑)
|
||||
if encoder_config["video_codec"] == "h264_nvenc":
|
||||
# 对于NVENC,不使用硬件解码以避免滤镜链问题
|
||||
pass
|
||||
elif hwaccel_args:
|
||||
cmd.extend(hwaccel_args)
|
||||
|
||||
# 输入文件
|
||||
cmd.extend(["-i", input_path])
|
||||
|
||||
# 时间范围
|
||||
cmd.extend(["-ss", start_time, "-to", end_time])
|
||||
|
||||
# 视频编码器设置
|
||||
cmd.extend(["-c:v", encoder_config["video_codec"]])
|
||||
|
||||
# 音频处理
|
||||
if remove_audio:
|
||||
# OST=0: 移除音频
|
||||
cmd.extend(["-an"]) # -an 表示不包含音频流
|
||||
logger.debug("OST=0: 移除音频流")
|
||||
else:
|
||||
# OST=1,2: 保持原声
|
||||
cmd.extend(["-c:a", encoder_config["audio_codec"]])
|
||||
cmd.extend(["-ar", "44100", "-ac", "2"])
|
||||
logger.debug("OST=1/2: 保持原声")
|
||||
|
||||
# 像素格式
|
||||
cmd.extend(["-pix_fmt", encoder_config["pixel_format"]])
|
||||
|
||||
# 质量和预设参数(参考原有逻辑)
|
||||
if encoder_config["video_codec"] == "h264_nvenc":
|
||||
cmd.extend(["-preset", encoder_config["preset"]])
|
||||
cmd.extend(["-cq", encoder_config["quality_value"]])
|
||||
cmd.extend(["-profile:v", "main"])
|
||||
elif encoder_config["video_codec"] == "h264_amf":
|
||||
cmd.extend(["-quality", encoder_config["preset"]])
|
||||
cmd.extend(["-qp_i", encoder_config["quality_value"]])
|
||||
elif encoder_config["video_codec"] == "h264_qsv":
|
||||
cmd.extend(["-preset", encoder_config["preset"]])
|
||||
cmd.extend(["-global_quality", encoder_config["quality_value"]])
|
||||
elif encoder_config["video_codec"] == "h264_videotoolbox":
|
||||
cmd.extend(["-profile:v", "high"])
|
||||
cmd.extend(["-b:v", encoder_config["quality_value"]])
|
||||
else:
|
||||
# 软件编码器(libx264)
|
||||
cmd.extend(["-preset", encoder_config["preset"]])
|
||||
cmd.extend(["-crf", encoder_config["quality_value"]])
|
||||
|
||||
# 优化参数
|
||||
cmd.extend(["-avoid_negative_ts", "make_zero"])
|
||||
cmd.extend(["-movflags", "+faststart"])
|
||||
|
||||
# 输出文件
|
||||
cmd.append(output_path)
|
||||
|
||||
return cmd
|
||||
|
||||
|
||||
def clip_video_unified(
|
||||
video_origin_path: str,
|
||||
script_list: List[Dict],
|
||||
tts_results: List[Dict],
|
||||
output_dir: Optional[str] = None,
|
||||
task_id: Optional[str] = None
|
||||
) -> Dict[str, str]:
|
||||
"""
|
||||
基于OST类型的统一视频裁剪策略 - 消除双重裁剪问题
|
||||
|
||||
Args:
|
||||
video_origin_path: 原始视频的路径
|
||||
script_list: 完整的脚本列表,包含所有片段信息
|
||||
tts_results: TTS结果列表,仅包含OST=0和OST=2的片段
|
||||
output_dir: 输出目录路径,默认为None时会自动生成
|
||||
task_id: 任务ID,用于生成唯一的输出目录,默认为None时会自动生成
|
||||
|
||||
Returns:
|
||||
Dict[str, str]: 片段ID到裁剪后视频路径的映射
|
||||
"""
|
||||
# 检查视频文件是否存在
|
||||
if not os.path.exists(video_origin_path):
|
||||
raise FileNotFoundError(f"视频文件不存在: {video_origin_path}")
|
||||
|
||||
# 如果未提供task_id,则根据输入生成一个唯一ID
|
||||
if task_id is None:
|
||||
content_for_hash = f"{video_origin_path}_{json.dumps(script_list)}"
|
||||
task_id = hashlib.md5(content_for_hash.encode()).hexdigest()
|
||||
|
||||
# 设置输出目录
|
||||
if output_dir is None:
|
||||
output_dir = os.path.join(
|
||||
os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))),
|
||||
"storage", "temp", "clip_video_unified", task_id
|
||||
)
|
||||
|
||||
# 确保输出目录存在
|
||||
Path(output_dir).mkdir(parents=True, exist_ok=True)
|
||||
|
||||
# 创建TTS结果的快速查找映射
|
||||
tts_map = {item['_id']: item for item in tts_results}
|
||||
|
||||
# 获取硬件加速支持
|
||||
hwaccel_type = check_hardware_acceleration()
|
||||
hwaccel_args = []
|
||||
|
||||
if hwaccel_type:
|
||||
hwaccel_args = ffmpeg_utils.get_ffmpeg_hwaccel_args()
|
||||
hwaccel_info = ffmpeg_utils.get_ffmpeg_hwaccel_info()
|
||||
logger.info(f"🚀 使用硬件加速: {hwaccel_type} ({hwaccel_info.get('message', '')})")
|
||||
else:
|
||||
logger.info("🔧 使用软件编码")
|
||||
|
||||
# 获取编码器配置
|
||||
encoder_config = get_safe_encoder_config(hwaccel_type)
|
||||
logger.debug(f"编码器配置: {encoder_config}")
|
||||
|
||||
# 统计信息
|
||||
total_clips = len(script_list)
|
||||
result = {}
|
||||
failed_clips = []
|
||||
success_count = 0
|
||||
|
||||
logger.info(f"📹 开始统一视频裁剪,总共{total_clips}个片段")
|
||||
|
||||
for i, script_item in enumerate(script_list, 1):
|
||||
_id = script_item.get("_id")
|
||||
ost = script_item.get("OST", 0)
|
||||
timestamp = script_item["timestamp"]
|
||||
|
||||
logger.info(f"📹 [{i}/{total_clips}] 处理片段 ID:{_id}, OST:{ost}, 时间戳:{timestamp}")
|
||||
|
||||
try:
|
||||
if ost == 0: # 纯解说片段
|
||||
output_path = _process_narration_only_segment(
|
||||
video_origin_path, script_item, tts_map, output_dir,
|
||||
encoder_config, hwaccel_args
|
||||
)
|
||||
elif ost == 1: # 纯原声片段
|
||||
output_path = _process_original_audio_segment(
|
||||
video_origin_path, script_item, output_dir,
|
||||
encoder_config, hwaccel_args
|
||||
)
|
||||
elif ost == 2: # 解说+原声混合片段
|
||||
output_path = _process_mixed_segment(
|
||||
video_origin_path, script_item, tts_map, output_dir,
|
||||
encoder_config, hwaccel_args
|
||||
)
|
||||
else:
|
||||
logger.warning(f"未知的OST类型: {ost},跳过片段 {_id}")
|
||||
continue
|
||||
|
||||
if output_path and os.path.exists(output_path) and os.path.getsize(output_path) > 0:
|
||||
result[_id] = output_path
|
||||
success_count += 1
|
||||
logger.info(f"✅ [{i}/{total_clips}] 片段处理成功: OST={ost}, ID={_id}")
|
||||
else:
|
||||
failed_clips.append(f"ID:{_id}, OST:{ost}")
|
||||
logger.error(f"❌ [{i}/{total_clips}] 片段处理失败: OST={ost}, ID={_id}")
|
||||
|
||||
except Exception as e:
|
||||
failed_clips.append(f"ID:{_id}, OST:{ost}")
|
||||
logger.error(f"❌ [{i}/{total_clips}] 片段处理异常: OST={ost}, ID={_id}, 错误: {str(e)}")
|
||||
|
||||
# 最终统计
|
||||
logger.info(f"📊 统一视频裁剪完成: 成功 {success_count}/{total_clips}, 失败 {len(failed_clips)}")
|
||||
|
||||
# 检查是否有失败的片段
|
||||
if failed_clips:
|
||||
logger.warning(f"⚠️ 以下片段处理失败: {failed_clips}")
|
||||
if len(failed_clips) == total_clips:
|
||||
raise RuntimeError("所有视频片段处理都失败了,请检查视频文件和ffmpeg配置")
|
||||
elif len(failed_clips) > total_clips / 2:
|
||||
logger.warning(f"⚠️ 超过一半的片段处理失败 ({len(failed_clips)}/{total_clips}),请检查硬件加速配置")
|
||||
|
||||
if success_count > 0:
|
||||
logger.info(f"🎉 统一视频裁剪任务完成! 输出目录: {output_dir}")
|
||||
|
||||
return result
|
||||
|
||||
|
||||
def clip_video(
|
||||
video_origin_path: str,
|
||||
tts_result: List[Dict],
|
||||
@ -613,6 +966,49 @@ def clip_video(
|
||||
|
||||
# 根据持续时间计算真正的结束时间(加上1秒余量)
|
||||
duration = item["duration"]
|
||||
|
||||
# 时长合理性检查和修正
|
||||
if duration <= 0 or duration > 300: # 超过5分钟认为不合理
|
||||
logger.warning(f"检测到异常时长 {duration}秒,片段: {timestamp}")
|
||||
|
||||
# 尝试从时间戳计算实际时长
|
||||
try:
|
||||
start_time_str, end_time_str = timestamp.split('-')
|
||||
|
||||
# 解析开始时间
|
||||
if ',' in start_time_str:
|
||||
time_part, ms_part = start_time_str.split(',')
|
||||
h1, m1, s1 = map(int, time_part.split(':'))
|
||||
ms1 = int(ms_part)
|
||||
else:
|
||||
h1, m1, s1 = map(int, start_time_str.split(':'))
|
||||
ms1 = 0
|
||||
|
||||
# 解析结束时间
|
||||
if ',' in end_time_str:
|
||||
time_part, ms_part = end_time_str.split(',')
|
||||
h2, m2, s2 = map(int, time_part.split(':'))
|
||||
ms2 = int(ms_part)
|
||||
else:
|
||||
h2, m2, s2 = map(int, end_time_str.split(':'))
|
||||
ms2 = 0
|
||||
|
||||
# 计算实际时长
|
||||
start_total_ms = (h1 * 3600 + m1 * 60 + s1) * 1000 + ms1
|
||||
end_total_ms = (h2 * 3600 + m2 * 60 + s2) * 1000 + ms2
|
||||
actual_duration = (end_total_ms - start_total_ms) / 1000.0
|
||||
|
||||
if actual_duration > 0 and actual_duration <= 300:
|
||||
duration = actual_duration
|
||||
logger.info(f"使用时间戳计算的实际时长: {duration:.3f}秒")
|
||||
else:
|
||||
duration = 5.0 # 默认5秒
|
||||
logger.warning(f"时间戳计算也异常,使用默认时长: {duration}秒")
|
||||
|
||||
except Exception as e:
|
||||
duration = 5.0 # 默认5秒
|
||||
logger.warning(f"时长修正失败,使用默认时长: {duration}秒, 错误: {str(e)}")
|
||||
|
||||
calculated_end_time = calculate_end_time(start_time, duration)
|
||||
|
||||
# 转换为FFmpeg兼容的时间格式(逗号替换为点)
|
||||
|
||||
@ -57,14 +57,33 @@ class BaseLLMProvider(ABC):
|
||||
"""验证配置参数"""
|
||||
if not self.api_key:
|
||||
raise ConfigurationError("API密钥不能为空", "api_key")
|
||||
|
||||
|
||||
if not self.model_name:
|
||||
raise ConfigurationError("模型名称不能为空", "model_name")
|
||||
|
||||
if self.model_name not in self.supported_models:
|
||||
from .exceptions import ModelNotSupportedError
|
||||
raise ModelNotSupportedError(self.model_name, self.provider_name)
|
||||
|
||||
# 检查模型支持情况
|
||||
self._validate_model_support()
|
||||
|
||||
def _validate_model_support(self):
|
||||
"""验证模型支持情况"""
|
||||
from app.config import config
|
||||
from .exceptions import ModelNotSupportedError
|
||||
from loguru import logger
|
||||
|
||||
# 获取模型验证模式配置
|
||||
strict_model_validation = config.app.get('strict_model_validation', True)
|
||||
|
||||
if self.model_name not in self.supported_models:
|
||||
if strict_model_validation:
|
||||
# 严格模式:抛出异常
|
||||
raise ModelNotSupportedError(self.model_name, self.provider_name)
|
||||
else:
|
||||
# 宽松模式:仅记录警告
|
||||
logger.warning(
|
||||
f"模型 {self.model_name} 未在供应商 {self.provider_name} 的预定义支持列表中,"
|
||||
f"但已启用宽松验证模式。支持的模型列表: {self.supported_models}"
|
||||
)
|
||||
|
||||
def _initialize(self):
|
||||
"""初始化提供商特定设置,子类可重写"""
|
||||
pass
|
||||
@ -77,11 +96,15 @@ class BaseLLMProvider(ABC):
|
||||
def _handle_api_error(self, status_code: int, response_text: str) -> LLMServiceError:
|
||||
"""处理API错误,返回适当的异常"""
|
||||
from .exceptions import APICallError, RateLimitError, AuthenticationError
|
||||
|
||||
|
||||
if status_code == 401:
|
||||
return AuthenticationError()
|
||||
elif status_code == 429:
|
||||
return RateLimitError()
|
||||
elif status_code in [502, 503, 504]:
|
||||
return APICallError(f"服务器错误 HTTP {status_code}", status_code, response_text)
|
||||
elif status_code == 524:
|
||||
return APICallError(f"服务器处理超时 HTTP {status_code}", status_code, response_text)
|
||||
else:
|
||||
return APICallError(f"HTTP {status_code}", status_code, response_text)
|
||||
|
||||
|
||||
@ -213,7 +213,8 @@ class LLMConfigValidator:
|
||||
"确保所有API密钥都已正确配置",
|
||||
"建议为每个提供商配置base_url以提高稳定性",
|
||||
"定期检查模型名称是否为最新版本",
|
||||
"建议配置多个提供商作为备用方案"
|
||||
"建议配置多个提供商作为备用方案",
|
||||
"如果使用新发布的模型遇到MODEL_NOT_SUPPORTED错误,可以设置 strict_model_validation = false 启用宽松验证模式"
|
||||
]
|
||||
}
|
||||
|
||||
@ -252,8 +253,8 @@ class LLMConfigValidator:
|
||||
"""获取示例模型名称"""
|
||||
examples = {
|
||||
"gemini": {
|
||||
"vision": ["gemini-2.0-flash-lite", "gemini-2.0-flash"],
|
||||
"text": ["gemini-2.0-flash", "gemini-1.5-pro"]
|
||||
"vision": ["gemini-2.5-flash", "gemini-2.0-flash-lite", "gemini-2.0-flash"],
|
||||
"text": ["gemini-2.5-flash", "gemini-2.0-flash", "gemini-1.5-pro"]
|
||||
},
|
||||
"openai": {
|
||||
"vision": [],
|
||||
|
||||
@ -27,6 +27,7 @@ class GeminiOpenAIVisionProvider(VisionModelProvider):
|
||||
@property
|
||||
def supported_models(self) -> List[str]:
|
||||
return [
|
||||
"gemini-2.5-flash",
|
||||
"gemini-2.0-flash-lite",
|
||||
"gemini-2.0-flash",
|
||||
"gemini-1.5-pro",
|
||||
@ -137,6 +138,7 @@ class GeminiOpenAITextProvider(TextModelProvider):
|
||||
@property
|
||||
def supported_models(self) -> List[str]:
|
||||
return [
|
||||
"gemini-2.5-flash",
|
||||
"gemini-2.0-flash-lite",
|
||||
"gemini-2.0-flash",
|
||||
"gemini-1.5-pro",
|
||||
|
||||
@ -27,6 +27,7 @@ class GeminiVisionProvider(VisionModelProvider):
|
||||
@property
|
||||
def supported_models(self) -> List[str]:
|
||||
return [
|
||||
"gemini-2.5-flash",
|
||||
"gemini-2.0-flash-lite",
|
||||
"gemini-2.0-flash",
|
||||
"gemini-1.5-pro",
|
||||
@ -136,25 +137,72 @@ class GeminiVisionProvider(VisionModelProvider):
|
||||
return base64.b64encode(img_bytes).decode('utf-8')
|
||||
|
||||
async def _make_api_call(self, payload: Dict[str, Any]) -> Dict[str, Any]:
|
||||
"""执行原生Gemini API调用"""
|
||||
"""执行原生Gemini API调用,包含重试机制"""
|
||||
from app.config import config
|
||||
|
||||
url = f"{self.base_url}/models/{self.model_name}:generateContent?key={self.api_key}"
|
||||
|
||||
response = await asyncio.to_thread(
|
||||
requests.post,
|
||||
url,
|
||||
json=payload,
|
||||
headers={
|
||||
"Content-Type": "application/json",
|
||||
"User-Agent": "NarratoAI/1.0"
|
||||
},
|
||||
timeout=120
|
||||
)
|
||||
|
||||
if response.status_code != 200:
|
||||
error = self._handle_api_error(response.status_code, response.text)
|
||||
raise error
|
||||
|
||||
return response.json()
|
||||
|
||||
max_retries = config.app.get('llm_max_retries', 3)
|
||||
base_timeout = config.app.get('llm_vision_timeout', 120)
|
||||
|
||||
for attempt in range(max_retries):
|
||||
try:
|
||||
# 根据尝试次数调整超时时间
|
||||
timeout = base_timeout * (attempt + 1)
|
||||
logger.debug(f"Gemini API调用尝试 {attempt + 1}/{max_retries},超时设置: {timeout}秒")
|
||||
|
||||
response = await asyncio.to_thread(
|
||||
requests.post,
|
||||
url,
|
||||
json=payload,
|
||||
headers={
|
||||
"Content-Type": "application/json",
|
||||
"User-Agent": "NarratoAI/1.0"
|
||||
},
|
||||
timeout=timeout
|
||||
)
|
||||
|
||||
if response.status_code == 200:
|
||||
return response.json()
|
||||
|
||||
# 处理特定的错误状态码
|
||||
if response.status_code == 429:
|
||||
# 速率限制,等待后重试
|
||||
wait_time = 30 * (attempt + 1)
|
||||
logger.warning(f"Gemini API速率限制,等待 {wait_time} 秒后重试")
|
||||
await asyncio.sleep(wait_time)
|
||||
continue
|
||||
elif response.status_code in [502, 503, 504, 524]:
|
||||
# 服务器错误或超时,可以重试
|
||||
if attempt < max_retries - 1:
|
||||
wait_time = 10 * (attempt + 1)
|
||||
logger.warning(f"Gemini API服务器错误 {response.status_code},等待 {wait_time} 秒后重试")
|
||||
await asyncio.sleep(wait_time)
|
||||
continue
|
||||
|
||||
# 其他错误,直接抛出
|
||||
error = self._handle_api_error(response.status_code, response.text)
|
||||
raise error
|
||||
|
||||
except requests.exceptions.Timeout:
|
||||
if attempt < max_retries - 1:
|
||||
wait_time = 15 * (attempt + 1)
|
||||
logger.warning(f"Gemini API请求超时,等待 {wait_time} 秒后重试")
|
||||
await asyncio.sleep(wait_time)
|
||||
continue
|
||||
else:
|
||||
raise APICallError("Gemini API请求超时,已达到最大重试次数")
|
||||
except requests.exceptions.RequestException as e:
|
||||
if attempt < max_retries - 1:
|
||||
wait_time = 10 * (attempt + 1)
|
||||
logger.warning(f"Gemini API网络错误: {str(e)},等待 {wait_time} 秒后重试")
|
||||
await asyncio.sleep(wait_time)
|
||||
continue
|
||||
else:
|
||||
raise APICallError(f"Gemini API网络错误: {str(e)}")
|
||||
|
||||
# 如果所有重试都失败了
|
||||
raise APICallError("Gemini API调用失败,已达到最大重试次数")
|
||||
|
||||
def _parse_vision_response(self, response_data: Dict[str, Any]) -> str:
|
||||
"""解析视觉分析响应"""
|
||||
@ -192,6 +240,7 @@ class GeminiTextProvider(TextModelProvider):
|
||||
@property
|
||||
def supported_models(self) -> List[str]:
|
||||
return [
|
||||
"gemini-2.5-flash",
|
||||
"gemini-2.0-flash-lite",
|
||||
"gemini-2.0-flash",
|
||||
"gemini-1.5-pro",
|
||||
@ -278,25 +327,72 @@ class GeminiTextProvider(TextModelProvider):
|
||||
return self._parse_text_response(response_data)
|
||||
|
||||
async def _make_api_call(self, payload: Dict[str, Any]) -> Dict[str, Any]:
|
||||
"""执行原生Gemini API调用"""
|
||||
"""执行原生Gemini API调用,包含重试机制"""
|
||||
from app.config import config
|
||||
|
||||
url = f"{self.base_url}/models/{self.model_name}:generateContent?key={self.api_key}"
|
||||
|
||||
response = await asyncio.to_thread(
|
||||
requests.post,
|
||||
url,
|
||||
json=payload,
|
||||
headers={
|
||||
"Content-Type": "application/json",
|
||||
"User-Agent": "NarratoAI/1.0"
|
||||
},
|
||||
timeout=120
|
||||
)
|
||||
|
||||
if response.status_code != 200:
|
||||
error = self._handle_api_error(response.status_code, response.text)
|
||||
raise error
|
||||
|
||||
return response.json()
|
||||
|
||||
max_retries = config.app.get('llm_max_retries', 3)
|
||||
base_timeout = config.app.get('llm_text_timeout', 180) # 文本生成任务使用更长的基础超时时间
|
||||
|
||||
for attempt in range(max_retries):
|
||||
try:
|
||||
# 根据尝试次数调整超时时间
|
||||
timeout = base_timeout * (attempt + 1)
|
||||
logger.debug(f"Gemini文本API调用尝试 {attempt + 1}/{max_retries},超时设置: {timeout}秒")
|
||||
|
||||
response = await asyncio.to_thread(
|
||||
requests.post,
|
||||
url,
|
||||
json=payload,
|
||||
headers={
|
||||
"Content-Type": "application/json",
|
||||
"User-Agent": "NarratoAI/1.0"
|
||||
},
|
||||
timeout=timeout
|
||||
)
|
||||
|
||||
if response.status_code == 200:
|
||||
return response.json()
|
||||
|
||||
# 处理特定的错误状态码
|
||||
if response.status_code == 429:
|
||||
# 速率限制,等待后重试
|
||||
wait_time = 30 * (attempt + 1)
|
||||
logger.warning(f"Gemini API速率限制,等待 {wait_time} 秒后重试")
|
||||
await asyncio.sleep(wait_time)
|
||||
continue
|
||||
elif response.status_code in [502, 503, 504, 524]:
|
||||
# 服务器错误或超时,可以重试
|
||||
if attempt < max_retries - 1:
|
||||
wait_time = 15 * (attempt + 1)
|
||||
logger.warning(f"Gemini API服务器错误 {response.status_code},等待 {wait_time} 秒后重试")
|
||||
await asyncio.sleep(wait_time)
|
||||
continue
|
||||
|
||||
# 其他错误,直接抛出
|
||||
error = self._handle_api_error(response.status_code, response.text)
|
||||
raise error
|
||||
|
||||
except requests.exceptions.Timeout:
|
||||
if attempt < max_retries - 1:
|
||||
wait_time = 20 * (attempt + 1)
|
||||
logger.warning(f"Gemini文本API请求超时,等待 {wait_time} 秒后重试")
|
||||
await asyncio.sleep(wait_time)
|
||||
continue
|
||||
else:
|
||||
raise APICallError("Gemini文本API请求超时,已达到最大重试次数")
|
||||
except requests.exceptions.RequestException as e:
|
||||
if attempt < max_retries - 1:
|
||||
wait_time = 15 * (attempt + 1)
|
||||
logger.warning(f"Gemini文本API网络错误: {str(e)},等待 {wait_time} 秒后重试")
|
||||
await asyncio.sleep(wait_time)
|
||||
continue
|
||||
else:
|
||||
raise APICallError(f"Gemini文本API网络错误: {str(e)}")
|
||||
|
||||
# 如果所有重试都失败了
|
||||
raise APICallError("Gemini文本API调用失败,已达到最大重试次数")
|
||||
|
||||
def _parse_text_response(self, response_data: Dict[str, Any]) -> str:
|
||||
"""解析文本生成响应"""
|
||||
|
||||
@ -15,13 +15,19 @@ from app.services import state as sm
|
||||
from app.utils import utils
|
||||
|
||||
|
||||
def start_subclip(task_id: str, params: VideoClipParams, subclip_path_videos: dict):
|
||||
def start_subclip(task_id: str, params: VideoClipParams, subclip_path_videos: dict = None):
|
||||
"""
|
||||
后台任务(自动剪辑视频进行剪辑)
|
||||
后台任务(统一视频裁剪处理)- 优化版本
|
||||
|
||||
实施基于OST类型的统一视频裁剪策略,消除双重裁剪问题:
|
||||
- OST=0: 根据TTS音频时长动态裁剪,移除原声
|
||||
- OST=1: 严格按照脚本timestamp精确裁剪,保持原声
|
||||
- OST=2: 根据TTS音频时长动态裁剪,保持原声
|
||||
|
||||
Args:
|
||||
task_id: 任务ID
|
||||
params: 视频参数
|
||||
subclip_path_videos: 视频片段路径
|
||||
subclip_path_videos: 视频片段路径(可选,仅作为备用方案)
|
||||
"""
|
||||
global merged_audio_path, merged_subtitle_path
|
||||
|
||||
@ -94,17 +100,26 @@ def start_subclip(task_id: str, params: VideoClipParams, subclip_path_videos: di
|
||||
# sm.state.update_task(task_id, state=const.TASK_STATE_PROCESSING, progress=40)
|
||||
|
||||
"""
|
||||
3. 裁剪视频 - 将超出音频长度的视频进行裁剪
|
||||
3. 统一视频裁剪 - 基于OST类型的差异化裁剪策略
|
||||
"""
|
||||
logger.info("\n\n## 3. 裁剪视频")
|
||||
video_clip_result = clip_video.clip_video(params.video_origin_path, tts_results)
|
||||
# 更新 list_script 中的时间戳
|
||||
logger.info("\n\n## 3. 统一视频裁剪(基于OST类型)")
|
||||
|
||||
# 使用新的统一裁剪策略
|
||||
video_clip_result = clip_video.clip_video_unified(
|
||||
video_origin_path=params.video_origin_path,
|
||||
script_list=list_script,
|
||||
tts_results=tts_results
|
||||
)
|
||||
|
||||
# 更新 list_script 中的时间戳和路径信息
|
||||
tts_clip_result = {tts_result['_id']: tts_result['audio_file'] for tts_result in tts_results}
|
||||
subclip_clip_result = {
|
||||
tts_result['_id']: tts_result['subtitle_file'] for tts_result in tts_results
|
||||
}
|
||||
new_script_list = update_script.update_script_timestamps(list_script, video_clip_result, tts_clip_result, subclip_clip_result)
|
||||
|
||||
logger.info(f"统一裁剪完成,处理了 {len(video_clip_result)} 个视频片段")
|
||||
|
||||
sm.state.update_task(task_id, state=const.TASK_STATE_PROCESSING, progress=60)
|
||||
|
||||
"""
|
||||
@ -139,8 +154,27 @@ def start_subclip(task_id: str, params: VideoClipParams, subclip_path_videos: di
|
||||
|
||||
combined_video_path = path.join(utils.task_dir(task_id), f"merger.mp4")
|
||||
logger.info(f"\n\n## 5. 合并视频: => {combined_video_path}")
|
||||
# 如果 new_script_list 中没有 video,则使用 subclip_path_videos 中的视频
|
||||
video_clips = [new_script['video'] if new_script.get('video') else subclip_path_videos.get(new_script.get('_id', '')) for new_script in new_script_list]
|
||||
|
||||
# 使用统一裁剪后的视频片段
|
||||
video_clips = []
|
||||
for new_script in new_script_list:
|
||||
video_path = new_script.get('video')
|
||||
if video_path and os.path.exists(video_path):
|
||||
video_clips.append(video_path)
|
||||
else:
|
||||
logger.warning(f"片段 {new_script.get('_id')} 的视频文件不存在或未生成: {video_path}")
|
||||
# 如果统一裁剪失败,尝试使用备用方案(如果提供了subclip_path_videos)
|
||||
if subclip_path_videos and new_script.get('_id') in subclip_path_videos:
|
||||
backup_video = subclip_path_videos[new_script.get('_id')]
|
||||
if os.path.exists(backup_video):
|
||||
video_clips.append(backup_video)
|
||||
logger.info(f"使用备用视频: {backup_video}")
|
||||
else:
|
||||
logger.error(f"备用视频也不存在: {backup_video}")
|
||||
else:
|
||||
logger.error(f"无法找到片段 {new_script.get('_id')} 的视频文件")
|
||||
|
||||
logger.info(f"准备合并 {len(video_clips)} 个视频片段")
|
||||
|
||||
merger_video.combine_clip_videos(
|
||||
output_video_path=combined_video_path,
|
||||
@ -208,6 +242,199 @@ def start_subclip(task_id: str, params: VideoClipParams, subclip_path_videos: di
|
||||
return kwargs
|
||||
|
||||
|
||||
def start_subclip_unified(task_id: str, params: VideoClipParams):
|
||||
"""
|
||||
统一视频裁剪处理函数 - 完全基于OST类型的新实现
|
||||
|
||||
这是优化后的版本,完全移除了对预裁剪视频的依赖,
|
||||
实现真正的统一裁剪策略。
|
||||
|
||||
Args:
|
||||
task_id: 任务ID
|
||||
params: 视频参数
|
||||
"""
|
||||
global merged_audio_path, merged_subtitle_path
|
||||
|
||||
logger.info(f"\n\n## 开始统一视频处理任务: {task_id}")
|
||||
sm.state.update_task(task_id, state=const.TASK_STATE_PROCESSING, progress=0)
|
||||
|
||||
"""
|
||||
1. 加载剪辑脚本
|
||||
"""
|
||||
logger.info("\n\n## 1. 加载视频脚本")
|
||||
video_script_path = path.join(params.video_clip_json_path)
|
||||
|
||||
if path.exists(video_script_path):
|
||||
try:
|
||||
with open(video_script_path, "r", encoding="utf-8") as f:
|
||||
list_script = json.load(f)
|
||||
video_list = [i['narration'] for i in list_script]
|
||||
video_ost = [i['OST'] for i in list_script]
|
||||
time_list = [i['timestamp'] for i in list_script]
|
||||
|
||||
video_script = " ".join(video_list)
|
||||
logger.debug(f"解说完整脚本: \n{video_script}")
|
||||
logger.debug(f"解说 OST 列表: \n{video_ost}")
|
||||
logger.debug(f"解说时间戳列表: \n{time_list}")
|
||||
except Exception as e:
|
||||
logger.error(f"无法读取视频json脚本,请检查脚本格式是否正确")
|
||||
raise ValueError("无法读取视频json脚本,请检查脚本格式是否正确")
|
||||
else:
|
||||
logger.error(f"video_script_path: {video_script_path}")
|
||||
raise ValueError("解说脚本不存在!请检查配置是否正确。")
|
||||
|
||||
"""
|
||||
2. 使用 TTS 生成音频素材
|
||||
"""
|
||||
logger.info("\n\n## 2. 根据OST设置生成音频列表")
|
||||
# 只为OST=0 or 2的判断生成音频, OST=0 仅保留解说 OST=2 保留解说和原声
|
||||
tts_segments = [
|
||||
segment for segment in list_script
|
||||
if segment['OST'] in [0, 2]
|
||||
]
|
||||
logger.debug(f"需要生成TTS的片段数: {len(tts_segments)}")
|
||||
|
||||
tts_results = voice.tts_multiple(
|
||||
task_id=task_id,
|
||||
list_script=tts_segments, # 只传入需要TTS的片段
|
||||
voice_name=params.voice_name,
|
||||
voice_rate=params.voice_rate,
|
||||
voice_pitch=params.voice_pitch,
|
||||
)
|
||||
|
||||
sm.state.update_task(task_id, state=const.TASK_STATE_PROCESSING, progress=20)
|
||||
|
||||
"""
|
||||
3. 统一视频裁剪 - 基于OST类型的差异化裁剪策略
|
||||
"""
|
||||
logger.info("\n\n## 3. 统一视频裁剪(基于OST类型)")
|
||||
|
||||
# 使用新的统一裁剪策略
|
||||
video_clip_result = clip_video.clip_video_unified(
|
||||
video_origin_path=params.video_origin_path,
|
||||
script_list=list_script,
|
||||
tts_results=tts_results
|
||||
)
|
||||
|
||||
# 更新 list_script 中的时间戳和路径信息
|
||||
tts_clip_result = {tts_result['_id']: tts_result['audio_file'] for tts_result in tts_results}
|
||||
subclip_clip_result = {
|
||||
tts_result['_id']: tts_result['subtitle_file'] for tts_result in tts_results
|
||||
}
|
||||
new_script_list = update_script.update_script_timestamps(list_script, video_clip_result, tts_clip_result, subclip_clip_result)
|
||||
|
||||
logger.info(f"统一裁剪完成,处理了 {len(video_clip_result)} 个视频片段")
|
||||
|
||||
sm.state.update_task(task_id, state=const.TASK_STATE_PROCESSING, progress=60)
|
||||
|
||||
"""
|
||||
4. 合并音频和字幕
|
||||
"""
|
||||
logger.info("\n\n## 4. 合并音频和字幕")
|
||||
total_duration = sum([script["duration"] for script in new_script_list])
|
||||
if tts_segments:
|
||||
try:
|
||||
# 合并音频文件
|
||||
merged_audio_path = audio_merger.merge_audio_files(
|
||||
task_id=task_id,
|
||||
total_duration=total_duration,
|
||||
list_script=new_script_list
|
||||
)
|
||||
logger.info(f"音频文件合并成功->{merged_audio_path}")
|
||||
# 合并字幕文件
|
||||
merged_subtitle_path = subtitle_merger.merge_subtitle_files(new_script_list)
|
||||
logger.info(f"字幕文件合并成功->{merged_subtitle_path}")
|
||||
except Exception as e:
|
||||
logger.error(f"合并音频文件失败: {str(e)}")
|
||||
else:
|
||||
logger.warning("没有需要合并的音频/字幕")
|
||||
merged_audio_path = ""
|
||||
merged_subtitle_path = ""
|
||||
|
||||
"""
|
||||
5. 合并视频
|
||||
"""
|
||||
final_video_paths = []
|
||||
combined_video_paths = []
|
||||
|
||||
combined_video_path = path.join(utils.task_dir(task_id), f"merger.mp4")
|
||||
logger.info(f"\n\n## 5. 合并视频: => {combined_video_path}")
|
||||
|
||||
# 使用统一裁剪后的视频片段
|
||||
video_clips = []
|
||||
for new_script in new_script_list:
|
||||
video_path = new_script.get('video')
|
||||
if video_path and os.path.exists(video_path):
|
||||
video_clips.append(video_path)
|
||||
else:
|
||||
logger.error(f"片段 {new_script.get('_id')} 的视频文件不存在: {video_path}")
|
||||
|
||||
logger.info(f"准备合并 {len(video_clips)} 个视频片段")
|
||||
|
||||
merger_video.combine_clip_videos(
|
||||
output_video_path=combined_video_path,
|
||||
video_paths=video_clips,
|
||||
video_ost_list=video_ost,
|
||||
video_aspect=params.video_aspect,
|
||||
threads=params.n_threads
|
||||
)
|
||||
sm.state.update_task(task_id, state=const.TASK_STATE_PROCESSING, progress=80)
|
||||
|
||||
"""
|
||||
6. 合并字幕/BGM/配音/视频
|
||||
"""
|
||||
output_video_path = path.join(utils.task_dir(task_id), f"combined.mp4")
|
||||
logger.info(f"\n\n## 6. 最后一步: 合并字幕/BGM/配音/视频 -> {output_video_path}")
|
||||
|
||||
bgm_path = utils.get_bgm_file()
|
||||
|
||||
# 获取优化的音量配置
|
||||
optimized_volumes = get_recommended_volumes_for_content('mixed')
|
||||
|
||||
# 应用用户设置和优化建议的组合
|
||||
final_tts_volume = params.tts_volume if hasattr(params, 'tts_volume') and params.tts_volume != 1.0 else optimized_volumes['tts_volume']
|
||||
final_original_volume = params.original_volume if hasattr(params, 'original_volume') and params.original_volume != 0.7 else optimized_volumes['original_volume']
|
||||
final_bgm_volume = params.bgm_volume if hasattr(params, 'bgm_volume') and params.bgm_volume != 0.3 else optimized_volumes['bgm_volume']
|
||||
|
||||
logger.info(f"音量配置 - TTS: {final_tts_volume}, 原声: {final_original_volume}, BGM: {final_bgm_volume}")
|
||||
|
||||
# 调用示例
|
||||
options = {
|
||||
'voice_volume': final_tts_volume,
|
||||
'bgm_volume': final_bgm_volume,
|
||||
'original_audio_volume': final_original_volume,
|
||||
'keep_original_audio': True,
|
||||
'subtitle_enabled': params.subtitle_enabled,
|
||||
'subtitle_font': params.font_name,
|
||||
'subtitle_font_size': params.font_size,
|
||||
'subtitle_color': params.text_fore_color,
|
||||
'subtitle_bg_color': None,
|
||||
'subtitle_position': params.subtitle_position,
|
||||
'custom_position': params.custom_position,
|
||||
'threads': params.n_threads
|
||||
}
|
||||
generate_video.merge_materials(
|
||||
video_path=combined_video_path,
|
||||
audio_path=merged_audio_path,
|
||||
subtitle_path=merged_subtitle_path,
|
||||
bgm_path=bgm_path,
|
||||
output_path=output_video_path,
|
||||
options=options
|
||||
)
|
||||
|
||||
final_video_paths.append(output_video_path)
|
||||
combined_video_paths.append(combined_video_path)
|
||||
|
||||
logger.success(f"统一处理任务 {task_id} 已完成, 生成 {len(final_video_paths)} 个视频.")
|
||||
|
||||
kwargs = {
|
||||
"videos": final_video_paths,
|
||||
"combined_videos": combined_video_paths
|
||||
}
|
||||
sm.state.update_task(task_id, state=const.TASK_STATE_COMPLETE, progress=100, **kwargs)
|
||||
return kwargs
|
||||
|
||||
|
||||
def validate_params(video_path, audio_path, output_file, params):
|
||||
"""
|
||||
验证输入参数
|
||||
|
||||
@ -4,19 +4,42 @@ import json
|
||||
import traceback
|
||||
import edge_tts
|
||||
import asyncio
|
||||
import requests
|
||||
from loguru import logger
|
||||
from typing import List, Union
|
||||
from typing import List, Union, Tuple
|
||||
from datetime import datetime
|
||||
from xml.sax.saxutils import unescape
|
||||
from edge_tts import submaker, SubMaker
|
||||
from edge_tts.submaker import mktimestamp
|
||||
# from edge_tts.submaker import mktimestamp # 函数可能不存在,我们自己实现
|
||||
from moviepy.video.tools import subtitles
|
||||
try:
|
||||
from moviepy import AudioFileClip
|
||||
MOVIEPY_AVAILABLE = True
|
||||
except ImportError:
|
||||
MOVIEPY_AVAILABLE = False
|
||||
logger.warning("moviepy 未安装,将使用估算方法计算音频时长")
|
||||
import time
|
||||
|
||||
from app.config import config
|
||||
from app.utils import utils
|
||||
|
||||
|
||||
def mktimestamp(time_seconds: float) -> str:
|
||||
"""
|
||||
将秒数转换为 SRT 时间戳格式
|
||||
|
||||
Args:
|
||||
time_seconds: 时间(秒)
|
||||
|
||||
Returns:
|
||||
str: SRT 格式的时间戳,如 "00:01:23.456"
|
||||
"""
|
||||
hours = int(time_seconds // 3600)
|
||||
minutes = int((time_seconds % 3600) // 60)
|
||||
seconds = time_seconds % 60
|
||||
return f"{hours:02d}:{minutes:02d}:{seconds:06.3f}"
|
||||
|
||||
|
||||
def get_all_azure_voices(filter_locals=None) -> list[str]:
|
||||
if filter_locals is None:
|
||||
filter_locals = ["zh-CN", "en-US", "zh-HK", "zh-TW", "vi-VN"]
|
||||
@ -1035,11 +1058,39 @@ def is_azure_v2_voice(voice_name: str):
|
||||
return ""
|
||||
|
||||
|
||||
def should_use_azure_speech_services(voice_name: str) -> bool:
|
||||
"""判断音色是否应该使用Azure Speech Services"""
|
||||
if not voice_name or is_soulvoice_voice(voice_name):
|
||||
return False
|
||||
|
||||
voice_name = voice_name.strip()
|
||||
|
||||
# 如果是带-V2后缀的,肯定是Azure Speech Services
|
||||
if voice_name.endswith("-V2"):
|
||||
return True
|
||||
|
||||
# 检查是否为Azure官方音色格式 (如: zh-CN-YunzeNeural)
|
||||
# Azure音色通常格式为: [语言]-[地区]-[名称]Neural
|
||||
import re
|
||||
pattern = r'^[a-z]{2}-[A-Z]{2}-\w+Neural$'
|
||||
if re.match(pattern, voice_name):
|
||||
return True
|
||||
|
||||
return False
|
||||
|
||||
|
||||
def tts(
|
||||
text: str, voice_name: str, voice_rate: float, voice_pitch: float, voice_file: str
|
||||
) -> Union[SubMaker, None]:
|
||||
if is_azure_v2_voice(voice_name):
|
||||
# 检查是否为 SoulVoice 引擎
|
||||
if is_soulvoice_voice(voice_name):
|
||||
return soulvoice_tts(text, voice_name, voice_file, speed=voice_rate)
|
||||
|
||||
# 检查是否应该使用 Azure Speech Services
|
||||
if should_use_azure_speech_services(voice_name):
|
||||
return azure_tts_v2(text, voice_name, voice_file)
|
||||
|
||||
# 默认使用 Edge TTS (Azure V1)
|
||||
return azure_tts_v1(text, voice_name, voice_rate, voice_pitch, voice_file)
|
||||
|
||||
|
||||
@ -1110,12 +1161,22 @@ def azure_tts_v1(
|
||||
|
||||
|
||||
def azure_tts_v2(text: str, voice_name: str, voice_file: str) -> Union[SubMaker, None]:
|
||||
voice_name = is_azure_v2_voice(voice_name)
|
||||
if not voice_name:
|
||||
logger.error(f"invalid voice name: {voice_name}")
|
||||
raise ValueError(f"invalid voice name: {voice_name}")
|
||||
# 直接使用官方音色名称,不需要V2后缀验证
|
||||
# Azure Speech Services 的音色名称如: zh-CN-YunzeNeural, en-US-AvaMultilingualNeural
|
||||
processed_voice_name = voice_name.strip()
|
||||
if not processed_voice_name:
|
||||
logger.error(f"invalid voice name: {voice_name} (empty)")
|
||||
raise ValueError(f"invalid voice name: {voice_name} (empty)")
|
||||
text = text.strip()
|
||||
|
||||
# 检查Azure Speech SDK是否可用
|
||||
try:
|
||||
import azure.cognitiveservices.speech as speechsdk
|
||||
except ImportError as e:
|
||||
logger.error("Azure Speech SDK 未安装。请运行: pip install azure-cognitiveservices-speech")
|
||||
logger.error("或者使用 Edge TTS 引擎作为替代方案")
|
||||
return None
|
||||
|
||||
def _format_duration_to_offset(duration) -> int:
|
||||
if isinstance(duration, str):
|
||||
time_obj = datetime.strptime(duration, "%H:%M:%S.%f")
|
||||
@ -1134,9 +1195,7 @@ def azure_tts_v2(text: str, voice_name: str, voice_file: str) -> Union[SubMaker,
|
||||
|
||||
for i in range(3):
|
||||
try:
|
||||
logger.info(f"start, voice name: {voice_name}, try: {i + 1}")
|
||||
|
||||
import azure.cognitiveservices.speech as speechsdk
|
||||
logger.info(f"start, voice name: {processed_voice_name}, try: {i + 1}")
|
||||
|
||||
sub_maker = SubMaker()
|
||||
|
||||
@ -1155,7 +1214,7 @@ def azure_tts_v2(text: str, voice_name: str, voice_file: str) -> Union[SubMaker,
|
||||
speech_config = speechsdk.SpeechConfig(
|
||||
subscription=speech_key, region=service_region
|
||||
)
|
||||
speech_config.speech_synthesis_voice_name = voice_name
|
||||
speech_config.speech_synthesis_voice_name = processed_voice_name
|
||||
# speech_config.set_property(property_id=speechsdk.PropertyId.SpeechServiceResponse_RequestSentenceBoundary,
|
||||
# value='true')
|
||||
speech_config.set_property(
|
||||
@ -1368,6 +1427,10 @@ def create_subtitle(sub_maker: submaker.SubMaker, text: str, subtitle_file: str)
|
||||
if start_time < 0:
|
||||
start_time = _start_time
|
||||
|
||||
# 将 100纳秒单位转换为秒
|
||||
start_time_seconds = start_time / 10000000
|
||||
end_time_seconds = end_time / 10000000
|
||||
|
||||
sub = unescape(sub)
|
||||
sub_line += sub
|
||||
sub_text = match_line(sub_line, sub_index)
|
||||
@ -1375,8 +1438,8 @@ def create_subtitle(sub_maker: submaker.SubMaker, text: str, subtitle_file: str)
|
||||
sub_index += 1
|
||||
line = formatter(
|
||||
idx=sub_index,
|
||||
start_time=start_time,
|
||||
end_time=end_time,
|
||||
start_time=start_time_seconds,
|
||||
end_time=end_time_seconds,
|
||||
sub_text=sub_text,
|
||||
)
|
||||
sub_items.append(line)
|
||||
@ -1402,9 +1465,13 @@ def create_subtitle(sub_maker: submaker.SubMaker, text: str, subtitle_file: str)
|
||||
f"\nsub_items:{json.dumps(sub_items, indent=4, ensure_ascii=False)}"
|
||||
f"\nscript_lines:{json.dumps(script_lines, indent=4, ensure_ascii=False)}"
|
||||
)
|
||||
# 返回默认值,避免 None 错误
|
||||
return subtitle_file, 3.0
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"failed, error: {str(e)}")
|
||||
# 返回默认值,避免 None 错误
|
||||
return subtitle_file, 3.0
|
||||
|
||||
|
||||
def get_audio_duration(sub_maker: submaker.SubMaker):
|
||||
@ -1453,8 +1520,21 @@ def tts_multiple(task_id: str, list_script: list, voice_name: str, voice_rate: f
|
||||
f"或者使用其他 tts 引擎")
|
||||
continue
|
||||
else:
|
||||
# 为当前片段生成字幕文件
|
||||
_, duration = create_subtitle(sub_maker=sub_maker, text=text, subtitle_file=subtitle_file)
|
||||
# SoulVoice 引擎不生成字幕文件
|
||||
if is_soulvoice_voice(voice_name):
|
||||
# 获取实际音频文件的时长
|
||||
duration = get_audio_duration_from_file(audio_file)
|
||||
if duration <= 0:
|
||||
# 如果无法获取文件时长,尝试从 SubMaker 获取
|
||||
duration = get_audio_duration(sub_maker)
|
||||
if duration <= 0:
|
||||
# 最后的 fallback,基于文本长度估算
|
||||
duration = max(1.0, len(text) / 3.0)
|
||||
logger.warning(f"无法获取音频时长,使用文本估算: {duration:.2f}秒")
|
||||
# 不创建字幕文件
|
||||
subtitle_file = ""
|
||||
else:
|
||||
_, duration = create_subtitle(sub_maker=sub_maker, text=text, subtitle_file=subtitle_file)
|
||||
|
||||
tts_results.append({
|
||||
"_id": item['_id'],
|
||||
@ -1467,3 +1547,168 @@ def tts_multiple(task_id: str, list_script: list, voice_name: str, voice_rate: f
|
||||
logger.info(f"已生成音频文件: {audio_file}")
|
||||
|
||||
return tts_results
|
||||
|
||||
|
||||
def get_audio_duration_from_file(audio_file: str) -> float:
|
||||
"""
|
||||
获取音频文件的时长(秒)
|
||||
"""
|
||||
if MOVIEPY_AVAILABLE:
|
||||
try:
|
||||
audio_clip = AudioFileClip(audio_file)
|
||||
duration = audio_clip.duration
|
||||
audio_clip.close()
|
||||
return duration
|
||||
except Exception as e:
|
||||
logger.error(f"使用 moviepy 获取音频时长失败: {str(e)}")
|
||||
|
||||
# Fallback: 使用更准确的估算方法
|
||||
try:
|
||||
import os
|
||||
file_size = os.path.getsize(audio_file)
|
||||
|
||||
# 更准确的 MP3 时长估算
|
||||
# 假设 MP3 平均比特率为 128kbps = 16KB/s
|
||||
# 但实际文件还包含头部信息,所以调整系数
|
||||
estimated_duration = max(1.0, file_size / 20000) # 调整为更保守的估算
|
||||
|
||||
# 对于中文语音,根据文本长度进行二次校正
|
||||
# 一般中文语音速度约为 3-4 字/秒
|
||||
logger.warning(f"使用文件大小估算音频时长: {estimated_duration:.2f}秒")
|
||||
return estimated_duration
|
||||
except Exception as e:
|
||||
logger.error(f"获取音频时长失败: {str(e)}")
|
||||
# 如果所有方法都失败,返回一个基于文本长度的估算
|
||||
return 3.0 # 默认3秒,避免返回0
|
||||
|
||||
|
||||
def is_soulvoice_voice(voice_name: str) -> bool:
|
||||
"""
|
||||
检查是否为 SoulVoice 语音
|
||||
"""
|
||||
return voice_name.startswith("soulvoice:") or voice_name.startswith("speech:")
|
||||
|
||||
|
||||
def parse_soulvoice_voice(voice_name: str) -> str:
|
||||
"""
|
||||
解析 SoulVoice 语音名称
|
||||
支持格式:
|
||||
- soulvoice:speech:mcg3fdnx:clzkyf4vy00e5qr6hywum4u84:bzznlkuhcjzpbosexitr
|
||||
- speech:mcg3fdnx:clzkyf4vy00e5qr6hywum4u84:bzznlkuhcjzpbosexitr
|
||||
"""
|
||||
if voice_name.startswith("soulvoice:"):
|
||||
return voice_name[10:] # 移除 "soulvoice:" 前缀
|
||||
return voice_name
|
||||
|
||||
|
||||
def soulvoice_tts(text: str, voice_name: str, voice_file: str, speed: float = 1.0) -> Union[SubMaker, None]:
|
||||
"""
|
||||
使用 SoulVoice API 进行文本转语音
|
||||
|
||||
Args:
|
||||
text: 要转换的文本
|
||||
voice_name: 语音名称
|
||||
voice_file: 输出音频文件路径
|
||||
speed: 语音速度
|
||||
|
||||
Returns:
|
||||
SubMaker: 包含时间戳信息的字幕制作器,失败时返回 None
|
||||
"""
|
||||
# 获取配置
|
||||
api_key = config.soulvoice.get("api_key", "")
|
||||
api_url = config.soulvoice.get("api_url", "https://tts.scsmtech.cn/tts")
|
||||
default_model = config.soulvoice.get("model", "FunAudioLLM/CosyVoice2-0.5B")
|
||||
|
||||
if not api_key:
|
||||
logger.error("SoulVoice API key 未配置")
|
||||
return None
|
||||
|
||||
# 解析语音名称
|
||||
parsed_voice = parse_soulvoice_voice(voice_name)
|
||||
|
||||
# 准备请求数据
|
||||
headers = {
|
||||
'Authorization': f'Bearer {api_key}',
|
||||
'Content-Type': 'application/json'
|
||||
}
|
||||
|
||||
data = {
|
||||
'text': text.strip(),
|
||||
'model': default_model,
|
||||
'voice': parsed_voice,
|
||||
'speed': speed
|
||||
}
|
||||
|
||||
# 重试机制
|
||||
for attempt in range(3):
|
||||
try:
|
||||
logger.info(f"第 {attempt + 1} 次调用 SoulVoice API")
|
||||
|
||||
# 设置代理
|
||||
proxies = {}
|
||||
if config.proxy.get("http"):
|
||||
proxies = {
|
||||
'http': config.proxy.get("http"),
|
||||
'https': config.proxy.get("https", config.proxy.get("http"))
|
||||
}
|
||||
|
||||
# 调用 API
|
||||
response = requests.post(
|
||||
api_url,
|
||||
headers=headers,
|
||||
json=data,
|
||||
proxies=proxies,
|
||||
timeout=60
|
||||
)
|
||||
|
||||
if response.status_code == 200:
|
||||
# 保存音频文件
|
||||
with open(voice_file, 'wb') as f:
|
||||
f.write(response.content)
|
||||
|
||||
logger.info(f"SoulVoice TTS 成功生成音频: {voice_file}")
|
||||
|
||||
# SoulVoice 不支持精确字幕生成,返回简单的 SubMaker 对象
|
||||
sub_maker = SubMaker()
|
||||
sub_maker.subs = [text] # 整个文本作为一个段落
|
||||
sub_maker.offset = [(0, 0)] # 占位时间戳
|
||||
|
||||
return sub_maker
|
||||
|
||||
else:
|
||||
logger.error(f"SoulVoice API 调用失败: {response.status_code} - {response.text}")
|
||||
|
||||
except requests.exceptions.Timeout:
|
||||
logger.error(f"SoulVoice API 调用超时 (尝试 {attempt + 1}/3)")
|
||||
except requests.exceptions.RequestException as e:
|
||||
logger.error(f"SoulVoice API 网络错误: {str(e)} (尝试 {attempt + 1}/3)")
|
||||
except Exception as e:
|
||||
logger.error(f"SoulVoice TTS 处理错误: {str(e)} (尝试 {attempt + 1}/3)")
|
||||
|
||||
if attempt < 2: # 不是最后一次尝试
|
||||
time.sleep(2) # 等待2秒后重试
|
||||
|
||||
logger.error("SoulVoice TTS 生成失败,已达到最大重试次数")
|
||||
return None
|
||||
|
||||
|
||||
def is_soulvoice_voice(voice_name: str) -> bool:
|
||||
"""
|
||||
检查是否为 SoulVoice 语音
|
||||
"""
|
||||
return voice_name.startswith("soulvoice:") or voice_name.startswith("speech:")
|
||||
|
||||
|
||||
def parse_soulvoice_voice(voice_name: str) -> str:
|
||||
"""
|
||||
解析 SoulVoice 语音名称
|
||||
支持格式:
|
||||
- soulvoice:speech:mcg3fdnx:clzkyf4vy00e5qr6hywum4u84:bzznlkuhcjzpbosexitr
|
||||
- speech:mcg3fdnx:clzkyf4vy00e5qr6hywum4u84:bzznlkuhcjzpbosexitr
|
||||
"""
|
||||
if voice_name.startswith("soulvoice:"):
|
||||
return voice_name[10:] # 移除 "soulvoice:" 前缀
|
||||
return voice_name
|
||||
|
||||
|
||||
|
||||
|
||||
@ -1,4 +1,5 @@
|
||||
import json
|
||||
import re
|
||||
from typing import Dict, Any
|
||||
|
||||
def check_format(script_content: str) -> Dict[str, Any]:
|
||||
@ -6,76 +7,104 @@ def check_format(script_content: str) -> Dict[str, Any]:
|
||||
Args:
|
||||
script_content: 脚本内容
|
||||
Returns:
|
||||
Dict: {'success': bool, 'message': str}
|
||||
Dict: {'success': bool, 'message': str, 'details': str}
|
||||
"""
|
||||
try:
|
||||
# 检查是否为有效的JSON
|
||||
data = json.loads(script_content)
|
||||
|
||||
|
||||
# 检查是否为列表
|
||||
if not isinstance(data, list):
|
||||
return {
|
||||
'success': False,
|
||||
'message': '脚本必须是JSON数组格式'
|
||||
'message': '脚本必须是JSON数组格式',
|
||||
'details': '正确格式应该是: [{"_id": 1, "timestamp": "...", ...}, ...]'
|
||||
}
|
||||
|
||||
|
||||
# 检查数组不能为空
|
||||
if len(data) == 0:
|
||||
return {
|
||||
'success': False,
|
||||
'message': '脚本数组不能为空',
|
||||
'details': '至少需要包含一个脚本片段'
|
||||
}
|
||||
|
||||
# 检查每个片段
|
||||
for i, clip in enumerate(data):
|
||||
# 检查是否为对象类型
|
||||
if not isinstance(clip, dict):
|
||||
return {
|
||||
'success': False,
|
||||
'message': f'第{i+1}个元素必须是对象类型',
|
||||
'details': f'当前类型: {type(clip).__name__}'
|
||||
}
|
||||
|
||||
# 检查必需字段
|
||||
required_fields = ['narration', 'picture', 'timestamp']
|
||||
required_fields = ['_id', 'timestamp', 'picture', 'narration', 'OST']
|
||||
for field in required_fields:
|
||||
if field not in clip:
|
||||
return {
|
||||
'success': False,
|
||||
'message': f'第{i+1}个片段缺少必需字段: {field}'
|
||||
'message': f'第{i+1}个片段缺少必需字段: {field}',
|
||||
'details': f'必需字段: {", ".join(required_fields)}'
|
||||
}
|
||||
|
||||
# 检查字段类型
|
||||
if not isinstance(clip['narration'], str):
|
||||
|
||||
# 验证 _id 字段
|
||||
if not isinstance(clip['_id'], int) or clip['_id'] <= 0:
|
||||
return {
|
||||
'success': False,
|
||||
'message': f'第{i+1}个片段的narration必须是字符串'
|
||||
'message': f'第{i+1}个片段的_id必须是正整数',
|
||||
'details': f'当前值: {clip["_id"]} (类型: {type(clip["_id"]).__name__})'
|
||||
}
|
||||
if not isinstance(clip['picture'], str):
|
||||
|
||||
# 验证 timestamp 字段格式
|
||||
timestamp_pattern = r'^\d{2}:\d{2}:\d{2},\d{3}-\d{2}:\d{2}:\d{2},\d{3}$'
|
||||
if not isinstance(clip['timestamp'], str) or not re.match(timestamp_pattern, clip['timestamp']):
|
||||
return {
|
||||
'success': False,
|
||||
'message': f'第{i+1}个片段的picture必须是字符串'
|
||||
'message': f'第{i+1}个片段的timestamp格式错误',
|
||||
'details': f'正确格式: "HH:MM:SS,mmm-HH:MM:SS,mmm",示例: "00:00:00,600-00:00:07,559"'
|
||||
}
|
||||
if not isinstance(clip['timestamp'], str):
|
||||
|
||||
# 验证 picture 字段
|
||||
if not isinstance(clip['picture'], str) or not clip['picture'].strip():
|
||||
return {
|
||||
'success': False,
|
||||
'message': f'第{i+1}个片段的timestamp必须是字符串'
|
||||
'message': f'第{i+1}个片段的picture必须是非空字符串',
|
||||
'details': f'当前值: {clip.get("picture", "未定义")}'
|
||||
}
|
||||
|
||||
# 检查字段内容不能为空
|
||||
if not clip['narration'].strip():
|
||||
|
||||
# 验证 narration 字段
|
||||
if not isinstance(clip['narration'], str) or not clip['narration'].strip():
|
||||
return {
|
||||
'success': False,
|
||||
'message': f'第{i+1}个片段的narration不能为空'
|
||||
'message': f'第{i+1}个片段的narration必须是非空字符串',
|
||||
'details': f'当前值: {clip.get("narration", "未定义")}'
|
||||
}
|
||||
if not clip['picture'].strip():
|
||||
|
||||
# 验证 OST 字段
|
||||
if not isinstance(clip['OST'], int):
|
||||
return {
|
||||
'success': False,
|
||||
'message': f'第{i+1}个片段的picture不能为空'
|
||||
}
|
||||
if not clip['timestamp'].strip():
|
||||
return {
|
||||
'success': False,
|
||||
'message': f'第{i+1}个片段的timestamp不能为空'
|
||||
'message': f'第{i+1}个片段的OST必须是整数',
|
||||
'details': f'当前值: {clip["OST"]} (类型: {type(clip["OST"]).__name__}),常用值: 0, 1, 2'
|
||||
}
|
||||
|
||||
return {
|
||||
'success': True,
|
||||
'message': '脚本格式检查通过'
|
||||
'message': '脚本格式检查通过',
|
||||
'details': f'共验证 {len(data)} 个脚本片段,格式正确'
|
||||
}
|
||||
|
||||
except json.JSONDecodeError as e:
|
||||
return {
|
||||
'success': False,
|
||||
'message': f'JSON格式错误: {str(e)}'
|
||||
'message': f'JSON格式错误: {str(e)}',
|
||||
'details': '请检查JSON语法,确保所有括号、引号、逗号正确'
|
||||
}
|
||||
except Exception as e:
|
||||
return {
|
||||
'success': False,
|
||||
'message': f'检查过程中发生错误: {str(e)}'
|
||||
'message': f'检查过程中发生错误: {str(e)}',
|
||||
'details': '请联系技术支持'
|
||||
}
|
||||
|
||||
@ -509,6 +509,12 @@ def clean_model_output(output):
|
||||
|
||||
|
||||
def cut_video(params, progress_callback=None):
|
||||
"""
|
||||
旧的视频裁剪函数 - 已弃用
|
||||
|
||||
注意:此函数已被统一裁剪策略取代,不再推荐使用。
|
||||
新的实现请使用 task.start_subclip_unified() 函数。
|
||||
"""
|
||||
try:
|
||||
task_id = str(uuid4())
|
||||
st.session_state['task_id'] = task_id
|
||||
|
||||
17
changelog.py
17
changelog.py
@ -1,17 +0,0 @@
|
||||
from git_changelog.cli import build_and_render
|
||||
|
||||
# 运行这段脚本自动生成CHANGELOG.md文件
|
||||
|
||||
build_and_render(
|
||||
repository=".",
|
||||
output="CHANGELOG.md",
|
||||
convention="angular",
|
||||
provider="github",
|
||||
template="keepachangelog",
|
||||
parse_trailers=True,
|
||||
parse_refs=False,
|
||||
sections=["build", "deps", "feat", "fix", "refactor"],
|
||||
versioning="pep440",
|
||||
bump="1.1.2", # 指定bump版本
|
||||
in_place=True,
|
||||
)
|
||||
Binary file not shown.
@ -1,5 +1,19 @@
|
||||
[app]
|
||||
project_version="0.6.8"
|
||||
project_version="0.7.0"
|
||||
|
||||
# 模型验证模式配置
|
||||
# true: 严格模式,只允许使用预定义支持列表中的模型(默认)
|
||||
# false: 宽松模式,允许使用任何模型名称,仅记录警告
|
||||
strict_model_validation = true
|
||||
|
||||
# LLM API 超时配置(秒)
|
||||
# 视觉模型基础超时时间
|
||||
llm_vision_timeout = 120
|
||||
# 文本模型基础超时时间(解说文案生成等复杂任务需要更长时间)
|
||||
llm_text_timeout = 180
|
||||
# API 重试次数
|
||||
llm_max_retries = 3
|
||||
|
||||
# 支持视频理解的大模型提供商
|
||||
# gemini (谷歌, 需要 VPN)
|
||||
# siliconflow (硅基流动)
|
||||
@ -77,6 +91,37 @@
|
||||
# webui界面是否显示配置项
|
||||
hide_config = true
|
||||
|
||||
[azure]
|
||||
# Azure TTS 配置
|
||||
speech_key = ""
|
||||
speech_region = ""
|
||||
|
||||
[soulvoice]
|
||||
# SoulVoice TTS API 密钥
|
||||
api_key = ""
|
||||
# 音色 URI(必需)
|
||||
voice_uri = "speech:mcg3fdnx:clzkyf4vy00e5qr6hywum4u84:bzznlkuhcjzpbosexitr"
|
||||
# API 接口地址(可选,默认值如下)
|
||||
api_url = "https://tts.scsmtech.cn/tts"
|
||||
# 默认模型(可选)
|
||||
model = "FunAudioLLM/CosyVoice2-0.5B"
|
||||
|
||||
[ui]
|
||||
# TTS引擎选择 (edge_tts, azure_speech, soulvoice)
|
||||
tts_engine = "edge_tts"
|
||||
|
||||
# Edge TTS 配置
|
||||
edge_voice_name = "zh-CN-XiaoyiNeural-Female"
|
||||
edge_volume = 80
|
||||
edge_rate = 1.0
|
||||
edge_pitch = 0
|
||||
|
||||
# Azure Speech Services 配置
|
||||
azure_voice_name = "zh-CN-XiaoyiNeural-Female"
|
||||
azure_volume = 80
|
||||
azure_rate = 1.0
|
||||
azure_pitch = 0
|
||||
|
||||
[proxy]
|
||||
# clash 默认地址:http://127.0.0.1:7890
|
||||
http = ""
|
||||
|
||||
19
main.py
19
main.py
@ -1,19 +0,0 @@
|
||||
import os
|
||||
import uvicorn
|
||||
from loguru import logger
|
||||
|
||||
from app.config import config
|
||||
|
||||
if __name__ == "__main__":
|
||||
logger.info(
|
||||
"start server, docs: http://127.0.0.1:" + str(config.listen_port) + "/docs"
|
||||
)
|
||||
os.environ["HTTP_PROXY"] = config.proxy.get("http")
|
||||
os.environ["HTTPS_PROXY"] = config.proxy.get("https")
|
||||
uvicorn.run(
|
||||
app="app.asgi:app",
|
||||
host=config.listen_host,
|
||||
port=config.listen_port,
|
||||
reload=config.reload_debug,
|
||||
log_level="warning",
|
||||
)
|
||||
@ -1 +1 @@
|
||||
0.6.8
|
||||
0.7.0
|
||||
@ -1,17 +0,0 @@
|
||||
# Release Notes
|
||||
|
||||
## Latest Changes
|
||||
|
||||
* docs(README): 更新README. PR [#138](https://github.com/linyqh/NarratoAI/pull/138) by [@linyqh](https://github.com/linyqh).
|
||||
* Dev 0.6.0. PR [#137](https://github.com/linyqh/NarratoAI/pull/137) by [@linyqh](https://github.com/linyqh).
|
||||
* Dev 0.6.0 . PR [#134](https://github.com/linyqh/NarratoAI/pull/134) by [@linyqh](https://github.com/linyqh).
|
||||
* Dev-0.3.9. PR [#73](https://github.com/linyqh/NarratoAI/pull/73) by [@linyqh](https://github.com/linyqh).
|
||||
* 0.3.9 版本发布. PR [#71](https://github.com/linyqh/NarratoAI/pull/71) by [@linyqh](https://github.com/linyqh).
|
||||
* docs: add Japanese README. PR [#66](https://github.com/linyqh/NarratoAI/pull/66) by [@eltociear](https://github.com/eltociear).
|
||||
* docs: 测试 release 2. PR [#62](https://github.com/linyqh/NarratoAI/pull/62) by [@linyqh](https://github.com/linyqh).
|
||||
* docs: 测试 release. PR [#61](https://github.com/linyqh/NarratoAI/pull/61) by [@linyqh](https://github.com/linyqh).
|
||||
* docs: 测试commit. PR [#60](https://github.com/linyqh/NarratoAI/pull/60) by [@linyqh](https://github.com/linyqh).
|
||||
* Dev. PR [#59](https://github.com/linyqh/NarratoAI/pull/59) by [@linyqh](https://github.com/linyqh).
|
||||
* 0.2.0新版预发布. PR [#37](https://github.com/linyqh/NarratoAI/pull/37) by [@linyqh](https://github.com/linyqh).
|
||||
* v0.3.6. PR [#58](https://github.com/linyqh/NarratoAI/pull/58) by [@linyqh](https://github.com/linyqh).
|
||||
* 0.3.4 修改各种bug. PR [#49](https://github.com/linyqh/NarratoAI/pull/49) by [@linyqh](https://github.com/linyqh).
|
||||
@ -11,6 +11,7 @@ pysrt==1.1.2
|
||||
|
||||
openai~=1.77.0
|
||||
google-generativeai>=0.8.5
|
||||
azure-cognitiveservices-speech~=1.37.0
|
||||
|
||||
# 待优化项
|
||||
# opencv-python==4.11.0.86
|
||||
@ -29,7 +30,6 @@ google-generativeai>=0.8.5
|
||||
# python-multipart~=0.0.9
|
||||
# redis==5.0.3
|
||||
# opencv-python~=4.10.0.84
|
||||
# azure-cognitiveservices-speech~=1.37.0
|
||||
# git-changelog~=2.5.2
|
||||
# watchdog==5.0.2
|
||||
# pydub==0.25.1
|
||||
|
||||
88
start.bat
88
start.bat
@ -1,88 +0,0 @@
|
||||
@echo off
|
||||
:: 设置控制台代码页为UTF-8,解决中文显示问题
|
||||
chcp 65001 >nul
|
||||
:: 关闭命令回显,使脚本运行时更整洁
|
||||
|
||||
:: 获取当前脚本所在目录路径并存储在变量中
|
||||
set "CURRENT_DIR=%~dp0"
|
||||
echo ***** 当前工作目录: %CURRENT_DIR% *****
|
||||
|
||||
:: ==================== FFmpeg 配置 ====================
|
||||
:: 设置 FFmpeg 可执行文件的完整路径
|
||||
set "FFMPEG_BINARY=%CURRENT_DIR%lib\ffmpeg\ffmpeg-7.0-essentials_build\ffmpeg.exe"
|
||||
set "FFMPEG_PATH=%CURRENT_DIR%lib\ffmpeg\ffmpeg-7.0-essentials_build"
|
||||
echo ***** FFmpeg 执行文件路径: %FFMPEG_BINARY% *****
|
||||
|
||||
:: 将 FFmpeg 目录添加到系统 PATH 环境变量,使其可以在命令行中直接调用
|
||||
set "PATH=%FFMPEG_PATH%;%PATH%"
|
||||
|
||||
:: ==================== ImageMagick 配置 ====================
|
||||
:: 设置 ImageMagick 可执行文件的完整路径(用于图像处理)
|
||||
set "IMAGEMAGICK_BINARY=%CURRENT_DIR%lib\imagemagic\ImageMagick-7.1.1-29-portable-Q16-x64\magick.exe"
|
||||
set "IMAGEMAGICK_PATH=%CURRENT_DIR%lib\imagemagic\ImageMagick-7.1.1-29-portable-Q16-x64"
|
||||
echo ***** ImageMagick 执行文件路径: %IMAGEMAGICK_BINARY% *****
|
||||
|
||||
:: 将 ImageMagick 目录添加到系统 PATH 环境变量
|
||||
set "PATH=%IMAGEMAGICK_PATH%;%PATH%"
|
||||
|
||||
:: ==================== Python 环境配置 ====================
|
||||
:: 设置 Python 模块搜索路径,确保能够正确导入项目模块
|
||||
set "PYTHONPATH=%CURRENT_DIR%NarratoAI;%PYTHONPATH%"
|
||||
echo ***** Python模块搜索路径: %PYTHONPATH% *****
|
||||
|
||||
:: ==================== 项目特定环境变量配置 ====================
|
||||
:: 设置项目根目录和依赖工具的路径,供应用程序内部使用
|
||||
set "NARRATO_ROOT=%CURRENT_DIR%NarratoAI"
|
||||
set "NARRATO_FFMPEG=%FFMPEG_BINARY%"
|
||||
set "NARRATO_IMAGEMAGICK=%IMAGEMAGICK_BINARY%"
|
||||
|
||||
:: ==================== Streamlit 配置 ====================
|
||||
:: 设置 Streamlit(Python Web应用框架)的配置文件路径
|
||||
set "USER_HOME=%USERPROFILE%"
|
||||
set "STREAMLIT_DIR=%USER_HOME%\.streamlit"
|
||||
set "CREDENTIAL_FILE=%STREAMLIT_DIR%\credentials.toml"
|
||||
echo ***** Streamlit 凭证文件路径: %CREDENTIAL_FILE% *****
|
||||
|
||||
:: 检查并创建 Streamlit 配置目录和凭证文件(如果不存在)
|
||||
if not exist "%STREAMLIT_DIR%" (
|
||||
echo 创建 Streamlit 配置目录...
|
||||
mkdir "%STREAMLIT_DIR%"
|
||||
(
|
||||
echo [general]
|
||||
echo email=""
|
||||
) > "%CREDENTIAL_FILE%"
|
||||
echo Streamlit 配置文件已创建!
|
||||
)
|
||||
|
||||
:: ==================== 依赖检查 ====================
|
||||
:: 验证必要的外部工具是否存在,确保应用可以正常运行
|
||||
if not exist "%FFMPEG_BINARY%" (
|
||||
echo 错误: 未找到 FFmpeg 执行文件,路径: %FFMPEG_BINARY%
|
||||
echo 请确保已正确安装 FFmpeg 或检查路径配置
|
||||
pause
|
||||
exit /b 1
|
||||
)
|
||||
|
||||
if not exist "%IMAGEMAGICK_BINARY%" (
|
||||
echo 错误: 未找到 ImageMagick 执行文件,路径: %IMAGEMAGICK_BINARY%
|
||||
echo 请确保已正确安装 ImageMagick 或检查路径配置
|
||||
pause
|
||||
exit /b 1
|
||||
)
|
||||
|
||||
:: ==================== 启动应用 ====================
|
||||
:: 切换到项目目录并启动应用
|
||||
echo ***** 切换工作目录到: %CURRENT_DIR%NarratoAI *****
|
||||
cd /d "%CURRENT_DIR%NarratoAI"
|
||||
|
||||
echo ***** 正在启动 NarratoAI 应用... *****
|
||||
:: 使用项目自带的Python解释器启动Streamlit应用
|
||||
"%CURRENT_DIR%lib\python\python.exe" -m streamlit run webui.py --browser.serverAddress="127.0.0.1" --server.enableCORS=True --server.maxUploadSize=2048 --browser.gatherUsageStats=False
|
||||
:: 参数说明:
|
||||
:: --browser.serverAddress="127.0.0.1" - 将服务器绑定到本地地址
|
||||
:: --server.enableCORS=True - 启用跨域资源共享
|
||||
:: --server.maxUploadSize=2048 - 设置最大上传文件大小为2048MB
|
||||
:: --browser.gatherUsageStats=False - 禁用使用统计收集
|
||||
|
||||
:: 应用关闭后暂停,让用户看到最终输出
|
||||
pause
|
||||
112
update.bat
112
update.bat
@ -1,112 +0,0 @@
|
||||
@echo off
|
||||
chcp 65001 >nul
|
||||
setlocal EnableDelayedExpansion
|
||||
set "CURRENT_DIR=%~dp0"
|
||||
echo ***** 当前目录: %CURRENT_DIR% *****
|
||||
|
||||
REM 清除可能影响的环境变量
|
||||
set PYTHONPATH=
|
||||
set PYTHONHOME=
|
||||
|
||||
REM 初始化代理设置为空
|
||||
set "HTTP_PROXY="
|
||||
set "HTTPS_PROXY="
|
||||
|
||||
:git_pull
|
||||
echo 正在更新代码,请稍候...
|
||||
REM 使用git更新代码并检查是否成功
|
||||
"%CURRENT_DIR%lib\git\bin\git.exe" -C "%CURRENT_DIR%NarratoAI" pull > "%TEMP%\git_output.txt" 2>&1
|
||||
set GIT_EXIT_CODE=%ERRORLEVEL%
|
||||
|
||||
if %GIT_EXIT_CODE% NEQ 0 (
|
||||
echo [错误] 代码更新失败!错误代码: %GIT_EXIT_CODE%
|
||||
type "%TEMP%\git_output.txt"
|
||||
|
||||
findstr /C:"error: 403" /C:"fatal: unable to access" /C:"The requested URL returned error: 403" "%TEMP%\git_output.txt" >nul
|
||||
if !ERRORLEVEL! EQU 0 (
|
||||
echo.
|
||||
echo [提示] 检测到 GitHub 403 错误,可能是由于网络问题导致。
|
||||
|
||||
if not defined HTTP_PROXY (
|
||||
echo.
|
||||
echo 请输入代理地址(例如 http://127.0.0.1:7890),或直接按回车跳过:
|
||||
set /p PROXY_INPUT="> "
|
||||
|
||||
if not "!PROXY_INPUT!"=="" (
|
||||
set "HTTP_PROXY=!PROXY_INPUT!"
|
||||
set "HTTPS_PROXY=!PROXY_INPUT!"
|
||||
echo.
|
||||
echo [信息] 已设置代理: !PROXY_INPUT!
|
||||
echo 正在使用代理重试...
|
||||
goto git_pull
|
||||
) else (
|
||||
echo.
|
||||
echo [警告] 未设置代理,建议:
|
||||
echo - 手动设置系统代理
|
||||
echo - 使用VPN或其他网络工具
|
||||
echo - 重新运行此脚本并输入代理地址
|
||||
)
|
||||
) else (
|
||||
echo.
|
||||
echo [警告] 使用代理 !HTTP_PROXY! 仍然失败。
|
||||
echo 您可以:
|
||||
echo 1. 输入新的代理地址(或直接按回车使用当前代理: !HTTP_PROXY!)
|
||||
echo 2. 输入 "clear" 清除代理设置
|
||||
set /p PROXY_INPUT="> "
|
||||
|
||||
if "!PROXY_INPUT!"=="clear" (
|
||||
set "HTTP_PROXY="
|
||||
set "HTTPS_PROXY="
|
||||
echo [信息] 已清除代理设置
|
||||
goto end
|
||||
) else if not "!PROXY_INPUT!"=="" (
|
||||
set "HTTP_PROXY=!PROXY_INPUT!"
|
||||
set "HTTPS_PROXY=!PROXY_INPUT!"
|
||||
echo [信息] 已更新代理为: !PROXY_INPUT!
|
||||
echo 正在使用新代理重试...
|
||||
goto git_pull
|
||||
) else (
|
||||
echo [信息] 保持当前代理: !HTTP_PROXY!
|
||||
echo 您可以稍后再次尝试或手动解决网络问题
|
||||
)
|
||||
)
|
||||
) else (
|
||||
echo.
|
||||
echo [警告] 遇到其他错误,请检查输出信息以获取更多详情。
|
||||
)
|
||||
goto end
|
||||
) else (
|
||||
echo [成功] 代码已成功更新!
|
||||
)
|
||||
|
||||
echo 正在更新pip,请稍候...
|
||||
"%CURRENT_DIR%lib\python\python.exe" -m pip install --upgrade pip >nul 2>&1
|
||||
if %ERRORLEVEL% NEQ 0 (
|
||||
echo [警告] pip更新失败,将继续使用当前版本。
|
||||
) else (
|
||||
echo [成功] pip已更新至最新版本!
|
||||
)
|
||||
|
||||
echo 正在安装依赖,请稍候...
|
||||
REM 确保使用正确的Python和pip
|
||||
"%CURRENT_DIR%lib\python\python.exe" -m pip install -q -r "%CURRENT_DIR%NarratoAI\requirements.txt"
|
||||
if %ERRORLEVEL% NEQ 0 (
|
||||
echo [错误] 依赖安装失败!请检查requirements.txt文件是否存在。
|
||||
goto end
|
||||
) else (
|
||||
echo [成功] 依赖安装完成!
|
||||
)
|
||||
|
||||
echo ===================================
|
||||
echo ✓ 程序更新已完成
|
||||
echo ===================================
|
||||
|
||||
:end
|
||||
if exist "%TEMP%\git_output.txt" del "%TEMP%\git_output.txt"
|
||||
REM 清除设置的代理环境变量
|
||||
if defined HTTP_PROXY (
|
||||
echo [信息] 本次会话的代理设置已清除
|
||||
set "HTTP_PROXY="
|
||||
set "HTTPS_PROXY="
|
||||
)
|
||||
pause
|
||||
@ -1,178 +0,0 @@
|
||||
import requests
|
||||
import json
|
||||
import os
|
||||
import time
|
||||
from typing import Dict, Any
|
||||
|
||||
class VideoPipeline:
|
||||
def __init__(self, base_url: str = "http://127.0.0.1:8080"):
|
||||
self.base_url = base_url
|
||||
|
||||
def download_video(self, url: str, resolution: str = "1080p",
|
||||
output_format: str = "mp4", rename: str = None) -> Dict[str, Any]:
|
||||
"""下载视频的第一步"""
|
||||
endpoint = f"{self.base_url}/api/v2/youtube/download"
|
||||
payload = {
|
||||
"url": url,
|
||||
"resolution": resolution,
|
||||
"output_format": output_format,
|
||||
"rename": rename or time.strftime("%Y-%m-%d")
|
||||
}
|
||||
|
||||
response = requests.post(endpoint, json=payload)
|
||||
response.raise_for_status()
|
||||
return response.json()
|
||||
|
||||
def generate_script(self, video_path: str, skip_seconds: int = 0,
|
||||
threshold: int = 30, vision_batch_size: int = 10,
|
||||
vision_llm_provider: str = "gemini") -> Dict[str, Any]:
|
||||
"""生成脚本的第二步"""
|
||||
endpoint = f"{self.base_url}/api/v2/scripts/generate"
|
||||
payload = {
|
||||
"video_path": video_path,
|
||||
"skip_seconds": skip_seconds,
|
||||
"threshold": threshold,
|
||||
"vision_batch_size": vision_batch_size,
|
||||
"vision_llm_provider": vision_llm_provider
|
||||
}
|
||||
|
||||
response = requests.post(endpoint, json=payload)
|
||||
response.raise_for_status()
|
||||
return response.json()
|
||||
|
||||
def crop_video(self, video_path: str, script: list) -> Dict[str, Any]:
|
||||
"""剪辑视频的第三步"""
|
||||
endpoint = f"{self.base_url}/api/v2/scripts/crop"
|
||||
payload = {
|
||||
"video_origin_path": video_path,
|
||||
"video_script": script
|
||||
}
|
||||
|
||||
response = requests.post(endpoint, json=payload)
|
||||
response.raise_for_status()
|
||||
return response.json()
|
||||
|
||||
def generate_final_video(self, task_id: str, video_path: str,
|
||||
script_path: str, script: list, subclip_videos: Dict[str, str], voice_name: str) -> Dict[str, Any]:
|
||||
"""生成最终视频的第四步"""
|
||||
endpoint = f"{self.base_url}/api/v2/scripts/start-subclip"
|
||||
|
||||
request_data = {
|
||||
"video_clip_json": script,
|
||||
"video_clip_json_path": script_path,
|
||||
"video_origin_path": video_path,
|
||||
"video_aspect": "16:9",
|
||||
"video_language": "zh-CN",
|
||||
"voice_name": voice_name,
|
||||
"voice_volume": 1,
|
||||
"voice_rate": 1.2,
|
||||
"voice_pitch": 1,
|
||||
"bgm_name": "random",
|
||||
"bgm_type": "random",
|
||||
"bgm_file": "",
|
||||
"bgm_volume": 0.3,
|
||||
"subtitle_enabled": True,
|
||||
"subtitle_position": "bottom",
|
||||
"font_name": "STHeitiMedium.ttc",
|
||||
"text_fore_color": "#FFFFFF",
|
||||
"text_background_color": "transparent",
|
||||
"font_size": 75,
|
||||
"stroke_color": "#000000",
|
||||
"stroke_width": 1.5,
|
||||
"custom_position": 70,
|
||||
"n_threads": 8
|
||||
}
|
||||
|
||||
payload = {
|
||||
"request": request_data,
|
||||
"subclip_videos": subclip_videos
|
||||
}
|
||||
|
||||
params = {"task_id": task_id}
|
||||
response = requests.post(endpoint, params=params, json=payload)
|
||||
response.raise_for_status()
|
||||
return response.json()
|
||||
|
||||
def save_script_to_json(self, script: list, script_path: str) -> str:
|
||||
"""保存脚本到json文件"""
|
||||
try:
|
||||
with open(script_path, 'w', encoding='utf-8') as f:
|
||||
json.dump(script, f, ensure_ascii=False, indent=2)
|
||||
print(f"脚本已保存到: {script_path}")
|
||||
return script_path
|
||||
except Exception as e:
|
||||
print(f"保存脚本失败: {str(e)}")
|
||||
raise
|
||||
|
||||
def run_pipeline(self, task_id: str, script_name: str, youtube_url: str, video_name: str="null", skip_seconds: int = 0, threshold: int = 30, vision_batch_size: int = 10, vision_llm_provider: str = "gemini", voice_name: str = "zh-CN-YunjianNeural") -> Dict[str, Any]:
|
||||
"""运行完整的pipeline"""
|
||||
try:
|
||||
current_path = os.path.dirname(os.path.abspath(__file__))
|
||||
video_path = os.path.join(current_path, "resource", "videos", f"{video_name}.mp4")
|
||||
# 判断视频是否存在
|
||||
if not os.path.exists(video_path):
|
||||
# 1. 下载视频
|
||||
print(f"视频不存在, 开始下载视频: {video_path}")
|
||||
download_result = self.download_video(url=youtube_url, resolution="1080p", output_format="mp4", rename=video_name)
|
||||
video_path = download_result["output_path"]
|
||||
else:
|
||||
print(f"视频已存在: {video_path}")
|
||||
|
||||
# 2. 判断script_name是否存在
|
||||
# 2.1.1 拼接脚本路径 NarratoAI/resource/scripts
|
||||
script_path = os.path.join(current_path, "resource", "scripts", script_name)
|
||||
if os.path.exists(script_path):
|
||||
script = json.load(open(script_path, "r", encoding="utf-8"))
|
||||
else:
|
||||
# 2.1.2 生成脚本
|
||||
print("开始生成脚本...")
|
||||
script_result = self.generate_script(video_path=video_path, skip_seconds=skip_seconds, threshold=threshold, vision_batch_size=vision_batch_size, vision_llm_provider=vision_llm_provider)
|
||||
script = script_result["script"]
|
||||
|
||||
# 2.2 保存脚本到json文件
|
||||
print("保存脚本到json文件...")
|
||||
self.save_script_to_json(script=script, script_path=script_path)
|
||||
|
||||
# 3. 剪辑视频
|
||||
print("开始剪辑视频...")
|
||||
crop_result = self.crop_video(video_path=video_path, script=script)
|
||||
subclip_videos = crop_result["subclip_videos"]
|
||||
|
||||
# 4. 生成最终视频
|
||||
print("开始生成最终视频...")
|
||||
self.generate_final_video(
|
||||
task_id=task_id,
|
||||
video_path=video_path,
|
||||
script_path=script_path,
|
||||
script=script,
|
||||
subclip_videos=subclip_videos,
|
||||
voice_name=voice_name
|
||||
)
|
||||
|
||||
return {
|
||||
"status": "等待异步生成视频",
|
||||
"path": os.path.join(current_path, "storage", "tasks", task_id)
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
return {
|
||||
"status": "error",
|
||||
"error": str(e)
|
||||
}
|
||||
|
||||
|
||||
# 使用示例
|
||||
if __name__ == "__main__":
|
||||
pipeline = VideoPipeline()
|
||||
result = pipeline.run_pipeline(
|
||||
task_id="test_111901",
|
||||
script_name="test.json",
|
||||
youtube_url="https://www.youtube.com/watch?v=vLJ7Yed6FQ4",
|
||||
video_name="2024-11-19-01",
|
||||
skip_seconds=50,
|
||||
threshold=35,
|
||||
vision_batch_size=10,
|
||||
vision_llm_provider="gemini",
|
||||
voice_name="zh-CN-YunjianNeural",
|
||||
)
|
||||
print(result)
|
||||
28
webui.py
28
webui.py
@ -4,7 +4,7 @@ import sys
|
||||
from loguru import logger
|
||||
from app.config import config
|
||||
from webui.components import basic_settings, video_settings, audio_settings, subtitle_settings, script_settings, \
|
||||
review_settings, merge_settings, system_settings
|
||||
system_settings
|
||||
# from webui.utils import cache, file_utils
|
||||
from app.utils import utils
|
||||
from app.utils import ffmpeg_utils
|
||||
@ -106,8 +106,7 @@ def init_global_state():
|
||||
st.session_state['video_plot'] = ''
|
||||
if 'ui_language' not in st.session_state:
|
||||
st.session_state['ui_language'] = config.ui.get("language", utils.get_system_locale())
|
||||
if 'subclip_videos' not in st.session_state:
|
||||
st.session_state['subclip_videos'] = {}
|
||||
# 移除subclip_videos初始化 - 现在使用统一裁剪策略
|
||||
|
||||
|
||||
def tr(key):
|
||||
@ -136,11 +135,9 @@ def render_generate_button():
|
||||
logger.add(log_received)
|
||||
|
||||
config.save_config()
|
||||
task_id = st.session_state.get('task_id')
|
||||
|
||||
if not task_id:
|
||||
st.error(tr("请先裁剪视频"))
|
||||
return
|
||||
# 移除task_id检查 - 现在使用统一裁剪策略,不再需要预裁剪
|
||||
# 直接检查必要的文件是否存在
|
||||
if not st.session_state.get('video_clip_json_path'):
|
||||
st.error(tr("脚本文件不能为空"))
|
||||
return
|
||||
@ -168,10 +165,14 @@ def render_generate_button():
|
||||
# 创建参数对象
|
||||
params = VideoClipParams(**all_params)
|
||||
|
||||
result = tm.start_subclip(
|
||||
# 使用新的统一裁剪策略,不再需要预裁剪的subclip_videos
|
||||
# 生成一个新的task_id用于本次处理
|
||||
import uuid
|
||||
task_id = str(uuid.uuid4())
|
||||
|
||||
result = tm.start_subclip_unified(
|
||||
task_id=task_id,
|
||||
params=params,
|
||||
subclip_path_videos=st.session_state['subclip_videos']
|
||||
params=params
|
||||
)
|
||||
|
||||
video_files = result.get("videos", [])
|
||||
@ -220,22 +221,17 @@ def main():
|
||||
# 首先渲染不依赖PyTorch的UI部分
|
||||
# 渲染基础设置面板
|
||||
basic_settings.render_basic_settings(tr)
|
||||
# 渲染合并设置
|
||||
merge_settings.render_merge_settings(tr)
|
||||
|
||||
# 渲染主面板
|
||||
panel = st.columns(3)
|
||||
with panel[0]:
|
||||
script_settings.render_script_panel(tr)
|
||||
with panel[1]:
|
||||
video_settings.render_video_panel(tr)
|
||||
audio_settings.render_audio_panel(tr)
|
||||
with panel[2]:
|
||||
video_settings.render_video_panel(tr)
|
||||
subtitle_settings.render_subtitle_panel(tr)
|
||||
|
||||
# 渲染视频审查面板
|
||||
review_settings.render_review_panel(tr)
|
||||
|
||||
# 放到最后渲染可能使用PyTorch的部分
|
||||
# 渲染系统设置面板
|
||||
with panel[2]:
|
||||
|
||||
@ -3,13 +3,11 @@ from .script_settings import render_script_panel
|
||||
from .video_settings import render_video_panel
|
||||
from .audio_settings import render_audio_panel
|
||||
from .subtitle_settings import render_subtitle_panel
|
||||
from .review_settings import render_review_panel
|
||||
|
||||
__all__ = [
|
||||
'render_basic_settings',
|
||||
'render_script_panel',
|
||||
'render_video_panel',
|
||||
'render_audio_panel',
|
||||
'render_subtitle_panel',
|
||||
'render_review_panel'
|
||||
]
|
||||
'render_subtitle_panel'
|
||||
]
|
||||
@ -8,6 +8,64 @@ from app.utils import utils
|
||||
from webui.utils.cache import get_songs_cache
|
||||
|
||||
|
||||
def get_soulvoice_voices():
|
||||
"""获取 SoulVoice 语音列表"""
|
||||
# 检查是否配置了 SoulVoice API key
|
||||
api_key = config.soulvoice.get("api_key", "")
|
||||
if not api_key:
|
||||
return []
|
||||
|
||||
# 只返回一个 SoulVoice 选项,音色通过输入框自定义
|
||||
return ["soulvoice:custom"]
|
||||
|
||||
|
||||
def get_tts_engine_options():
|
||||
"""获取TTS引擎选项"""
|
||||
return {
|
||||
"edge_tts": "Edge TTS",
|
||||
"azure_speech": "Azure Speech Services",
|
||||
"soulvoice": "SoulVoice"
|
||||
}
|
||||
|
||||
|
||||
def get_tts_engine_descriptions():
|
||||
"""获取TTS引擎详细描述"""
|
||||
return {
|
||||
"edge_tts": {
|
||||
"title": "Edge TTS",
|
||||
"features": "完全免费,但服务稳定性一般,不支持语音克隆功能",
|
||||
"use_case": "测试和轻量级使用",
|
||||
"registration": None
|
||||
},
|
||||
"azure_speech": {
|
||||
"title": "Azure Speech Services",
|
||||
"features": "提供一定免费额度,超出后按量付费,需要绑定海外信用卡",
|
||||
"use_case": "企业级应用,需要稳定服务",
|
||||
"registration": "https://portal.azure.com/#view/Microsoft_Azure_ProjectOxford/CognitiveServicesHub/~/SpeechServices"
|
||||
},
|
||||
"soulvoice": {
|
||||
"title": "SoulVoice",
|
||||
"features": "提供免费额度,支持语音克隆,支持微信购买额度,无需信用卡,性价比极高",
|
||||
"use_case": "个人用户和中小企业,需要语音克隆功能",
|
||||
"registration": "https://soulvoice.scsmtech.cn/"
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
def is_valid_azure_voice_name(voice_name: str) -> bool:
|
||||
"""检查是否为有效的Azure音色名称格式"""
|
||||
if not voice_name or not isinstance(voice_name, str):
|
||||
return False
|
||||
|
||||
voice_name = voice_name.strip()
|
||||
|
||||
# Azure音色名称通常格式为: [语言]-[地区]-[名称]Neural
|
||||
# 例如: zh-CN-YunzeNeural, en-US-AvaMultilingualNeural
|
||||
import re
|
||||
pattern = r'^[a-z]{2}-[A-Z]{2}-\w+Neural$'
|
||||
return bool(re.match(pattern, voice_name))
|
||||
|
||||
|
||||
def render_audio_panel(tr):
|
||||
"""渲染音频设置面板"""
|
||||
with st.container(border=True):
|
||||
@ -22,37 +80,91 @@ def render_audio_panel(tr):
|
||||
|
||||
def render_tts_settings(tr):
|
||||
"""渲染TTS(文本转语音)设置"""
|
||||
|
||||
# 1. TTS引擎选择器
|
||||
# st.subheader("🎤 TTS引擎选择")
|
||||
|
||||
engine_options = get_tts_engine_options()
|
||||
engine_descriptions = get_tts_engine_descriptions()
|
||||
|
||||
# 获取保存的TTS引擎设置
|
||||
saved_tts_engine = config.ui.get("tts_engine", "edge_tts")
|
||||
|
||||
# 确保保存的引擎在可用选项中
|
||||
if saved_tts_engine not in engine_options:
|
||||
saved_tts_engine = "edge_tts"
|
||||
|
||||
# TTS引擎选择下拉框
|
||||
selected_engine = st.selectbox(
|
||||
"选择TTS引擎",
|
||||
options=list(engine_options.keys()),
|
||||
format_func=lambda x: engine_options[x],
|
||||
index=list(engine_options.keys()).index(saved_tts_engine),
|
||||
help="选择您要使用的文本转语音引擎"
|
||||
)
|
||||
|
||||
# 保存TTS引擎选择
|
||||
config.ui["tts_engine"] = selected_engine
|
||||
|
||||
# 2. 显示引擎详细说明
|
||||
if selected_engine in engine_descriptions:
|
||||
desc = engine_descriptions[selected_engine]
|
||||
|
||||
with st.expander(f"📋 {desc['title']} 详细说明", expanded=True):
|
||||
st.markdown(f"**特点:** {desc['features']}")
|
||||
st.markdown(f"**适用场景:** {desc['use_case']}")
|
||||
|
||||
if desc['registration']:
|
||||
st.markdown(f"**注册地址:** [{desc['registration']}]({desc['registration']})")
|
||||
|
||||
# 3. 根据选择的引擎渲染对应的配置界面
|
||||
# st.subheader("⚙️ 引擎配置")
|
||||
|
||||
if selected_engine == "edge_tts":
|
||||
render_edge_tts_settings(tr)
|
||||
elif selected_engine == "azure_speech":
|
||||
render_azure_speech_settings(tr)
|
||||
elif selected_engine == "soulvoice":
|
||||
render_soulvoice_engine_settings(tr)
|
||||
|
||||
# 4. 试听功能
|
||||
render_voice_preview_new(tr, selected_engine)
|
||||
|
||||
|
||||
def render_edge_tts_settings(tr):
|
||||
"""渲染 Edge TTS 引擎设置"""
|
||||
# 获取支持的语音列表
|
||||
support_locales = ["zh-CN", "en-US"]
|
||||
voices = voice.get_all_azure_voices(filter_locals=support_locales)
|
||||
all_voices = voice.get_all_azure_voices(filter_locals=support_locales)
|
||||
|
||||
# 只保留标准版本的语音(Edge TTS专用,不包含V2)
|
||||
edge_voices = [v for v in all_voices if "-V2" not in v]
|
||||
|
||||
# 创建友好的显示名称
|
||||
friendly_names = {
|
||||
v: v.replace("Female", tr("Female"))
|
||||
.replace("Male", tr("Male"))
|
||||
.replace("Neural", "")
|
||||
for v in voices
|
||||
}
|
||||
friendly_names = {}
|
||||
for v in edge_voices:
|
||||
friendly_names[v] = v.replace("Female", tr("Female")).replace("Male", tr("Male")).replace("Neural", "")
|
||||
|
||||
# 获取保存的语音设置
|
||||
saved_voice_name = config.ui.get("voice_name", "")
|
||||
saved_voice_name_index = 0
|
||||
saved_voice_name = config.ui.get("edge_voice_name", "zh-CN-XiaoxiaoNeural-Female")
|
||||
|
||||
if saved_voice_name in friendly_names:
|
||||
saved_voice_name_index = list(friendly_names.keys()).index(saved_voice_name)
|
||||
else:
|
||||
# 如果没有保存的设置,选择与UI语言匹配的第一个语音
|
||||
for i, v in enumerate(voices):
|
||||
if (v.lower().startswith(st.session_state["ui_language"].lower())
|
||||
and "V2" not in v):
|
||||
saved_voice_name_index = i
|
||||
# 确保保存的音色在可用列表中
|
||||
if saved_voice_name not in friendly_names:
|
||||
# 选择与UI语言匹配的第一个语音
|
||||
for v in edge_voices:
|
||||
if v.lower().startswith(st.session_state.get("ui_language", "zh-CN").lower()):
|
||||
saved_voice_name = v
|
||||
break
|
||||
else:
|
||||
# 如果没找到匹配的,使用第一个
|
||||
saved_voice_name = edge_voices[0] if edge_voices else ""
|
||||
|
||||
# 语音选择下拉框
|
||||
# 音色选择下拉框(Edge TTS音色相对较少,保留下拉框)
|
||||
selected_friendly_name = st.selectbox(
|
||||
tr("Speech Synthesis"),
|
||||
"音色选择",
|
||||
options=list(friendly_names.values()),
|
||||
index=saved_voice_name_index,
|
||||
index=list(friendly_names.keys()).index(saved_voice_name) if saved_voice_name in friendly_names else 0,
|
||||
help="选择Edge TTS音色"
|
||||
)
|
||||
|
||||
# 获取实际的语音名称
|
||||
@ -60,22 +172,323 @@ def render_tts_settings(tr):
|
||||
list(friendly_names.values()).index(selected_friendly_name)
|
||||
]
|
||||
|
||||
# 保存设置
|
||||
config.ui["voice_name"] = voice_name
|
||||
# 显示音色信息
|
||||
with st.expander("💡 Edge TTS 音色说明", expanded=False):
|
||||
st.write("**中文音色:**")
|
||||
zh_voices = [v for v in edge_voices if v.startswith("zh-CN")]
|
||||
for v in zh_voices:
|
||||
gender = "女声" if "Female" in v else "男声"
|
||||
name = v.replace("-Female", "").replace("-Male", "").replace("zh-CN-", "").replace("Neural", "")
|
||||
st.write(f"• {name} ({gender})")
|
||||
|
||||
# Azure V2语音特殊处理
|
||||
if voice.is_azure_v2_voice(voice_name):
|
||||
render_azure_v2_settings(tr)
|
||||
st.write("")
|
||||
st.write("**英文音色:**")
|
||||
en_voices = [v for v in edge_voices if v.startswith("en-US")][:5] # 只显示前5个
|
||||
for v in en_voices:
|
||||
gender = "女声" if "Female" in v else "男声"
|
||||
name = v.replace("-Female", "").replace("-Male", "").replace("en-US-", "").replace("Neural", "")
|
||||
st.write(f"• {name} ({gender})")
|
||||
|
||||
# 语音参数设置
|
||||
render_voice_parameters(tr)
|
||||
if len([v for v in edge_voices if v.startswith("en-US")]) > 5:
|
||||
st.write("• ... 更多英文音色")
|
||||
|
||||
# 试听按钮
|
||||
render_voice_preview(tr, voice_name)
|
||||
config.ui["edge_voice_name"] = voice_name
|
||||
config.ui["voice_name"] = voice_name # 兼容性
|
||||
|
||||
# 音量调节
|
||||
voice_volume = st.slider(
|
||||
"音量调节",
|
||||
min_value=0,
|
||||
max_value=100,
|
||||
value=int(config.ui.get("edge_volume", 80)),
|
||||
step=1,
|
||||
help="调节语音音量 (0-100)"
|
||||
)
|
||||
config.ui["edge_volume"] = voice_volume
|
||||
st.session_state['voice_volume'] = voice_volume / 100.0
|
||||
|
||||
# 语速调节
|
||||
voice_rate = st.slider(
|
||||
"语速调节",
|
||||
min_value=0.5,
|
||||
max_value=2.0,
|
||||
value=config.ui.get("edge_rate", 1.0),
|
||||
step=0.1,
|
||||
help="调节语音速度 (0.5-2.0倍速)"
|
||||
)
|
||||
config.ui["edge_rate"] = voice_rate
|
||||
st.session_state['voice_rate'] = voice_rate
|
||||
|
||||
# 语调调节
|
||||
voice_pitch = st.slider(
|
||||
"语调调节",
|
||||
min_value=-50,
|
||||
max_value=50,
|
||||
value=int(config.ui.get("edge_pitch", 0)),
|
||||
step=5,
|
||||
help="调节语音音调 (-50%到+50%)"
|
||||
)
|
||||
config.ui["edge_pitch"] = voice_pitch
|
||||
# 转换为比例值
|
||||
st.session_state['voice_pitch'] = 1.0 + (voice_pitch / 100.0)
|
||||
|
||||
|
||||
def render_azure_speech_settings(tr):
|
||||
"""渲染 Azure Speech Services 引擎设置"""
|
||||
# 服务区域配置
|
||||
azure_speech_region = st.text_input(
|
||||
"服务区域",
|
||||
value=config.azure.get("speech_region", ""),
|
||||
placeholder="例如:eastus",
|
||||
help="Azure Speech Services 服务区域,如:eastus, westus2, eastasia 等"
|
||||
)
|
||||
|
||||
# API Key配置
|
||||
azure_speech_key = st.text_input(
|
||||
"API Key",
|
||||
value=config.azure.get("speech_key", ""),
|
||||
type="password",
|
||||
help="Azure Speech Services API 密钥"
|
||||
)
|
||||
|
||||
# 保存Azure配置
|
||||
config.azure["speech_region"] = azure_speech_region
|
||||
config.azure["speech_key"] = azure_speech_key
|
||||
|
||||
# 音色名称输入框
|
||||
saved_voice_name = config.ui.get("azure_voice_name", "zh-CN-XiaoxiaoMultilingualNeural")
|
||||
|
||||
# 音色名称输入
|
||||
voice_name = st.text_input(
|
||||
"音色名称",
|
||||
value=saved_voice_name,
|
||||
help="输入Azure Speech Services音色名称,直接使用官方音色名称即可。例如:zh-CN-YunzeNeural",
|
||||
placeholder="zh-CN-YunzeNeural"
|
||||
)
|
||||
|
||||
# 显示常用音色示例
|
||||
with st.expander("💡 常用音色参考", expanded=False):
|
||||
st.write("**中文音色:**")
|
||||
st.write("• zh-CN-XiaoxiaoMultilingualNeural (女声,多语言)")
|
||||
st.write("• zh-CN-YunzeNeural (男声)")
|
||||
st.write("• zh-CN-YunxiNeural (男声)")
|
||||
st.write("• zh-CN-XiaochenNeural (女声)")
|
||||
st.write("")
|
||||
st.write("**英文音色:**")
|
||||
st.write("• en-US-AndrewMultilingualNeural (男声,多语言)")
|
||||
st.write("• en-US-AvaMultilingualNeural (女声,多语言)")
|
||||
st.write("• en-US-BrianMultilingualNeural (男声,多语言)")
|
||||
st.write("• en-US-EmmaMultilingualNeural (女声,多语言)")
|
||||
st.write("")
|
||||
st.info("💡 更多音色请参考 [Azure Speech Services 官方文档](https://docs.microsoft.com/en-us/azure/cognitive-services/speech-service/language-support)")
|
||||
|
||||
# 快速选择按钮
|
||||
st.write("**快速选择:**")
|
||||
cols = st.columns(3)
|
||||
with cols[0]:
|
||||
if st.button("中文女声", help="zh-CN-XiaoxiaoMultilingualNeural"):
|
||||
voice_name = "zh-CN-XiaoxiaoMultilingualNeural"
|
||||
st.rerun()
|
||||
with cols[1]:
|
||||
if st.button("中文男声", help="zh-CN-YunzeNeural"):
|
||||
voice_name = "zh-CN-YunzeNeural"
|
||||
st.rerun()
|
||||
with cols[2]:
|
||||
if st.button("英文女声", help="en-US-AvaMultilingualNeural"):
|
||||
voice_name = "en-US-AvaMultilingualNeural"
|
||||
st.rerun()
|
||||
|
||||
# 验证音色名称并显示状态
|
||||
if voice_name.strip():
|
||||
# 检查是否为有效的Azure音色格式
|
||||
if is_valid_azure_voice_name(voice_name):
|
||||
st.success(f"✅ 音色名称有效: {voice_name}")
|
||||
else:
|
||||
st.warning(f"⚠️ 音色名称格式可能不正确: {voice_name}")
|
||||
st.info("💡 Azure音色名称通常格式为: [语言]-[地区]-[名称]Neural")
|
||||
|
||||
# 保存配置
|
||||
config.ui["azure_voice_name"] = voice_name
|
||||
config.ui["voice_name"] = voice_name # 兼容性
|
||||
|
||||
# 音量调节
|
||||
voice_volume = st.slider(
|
||||
"音量调节",
|
||||
min_value=0,
|
||||
max_value=100,
|
||||
value=int(config.ui.get("azure_volume", 80)),
|
||||
step=1,
|
||||
help="调节语音音量 (0-100)"
|
||||
)
|
||||
config.ui["azure_volume"] = voice_volume
|
||||
st.session_state['voice_volume'] = voice_volume / 100.0
|
||||
|
||||
# 语速调节
|
||||
voice_rate = st.slider(
|
||||
"语速调节",
|
||||
min_value=0.5,
|
||||
max_value=2.0,
|
||||
value=config.ui.get("azure_rate", 1.0),
|
||||
step=0.1,
|
||||
help="调节语音速度 (0.5-2.0倍速)"
|
||||
)
|
||||
config.ui["azure_rate"] = voice_rate
|
||||
st.session_state['voice_rate'] = voice_rate
|
||||
|
||||
# 语调调节
|
||||
voice_pitch = st.slider(
|
||||
"语调调节",
|
||||
min_value=-50,
|
||||
max_value=50,
|
||||
value=int(config.ui.get("azure_pitch", 0)),
|
||||
step=5,
|
||||
help="调节语音音调 (-50%到+50%)"
|
||||
)
|
||||
config.ui["azure_pitch"] = voice_pitch
|
||||
# 转换为比例值
|
||||
st.session_state['voice_pitch'] = 1.0 + (voice_pitch / 100.0)
|
||||
|
||||
# 显示配置状态
|
||||
if azure_speech_region and azure_speech_key:
|
||||
st.success("✅ Azure Speech Services 配置已设置")
|
||||
elif not azure_speech_region:
|
||||
st.warning("⚠️ 请配置服务区域")
|
||||
elif not azure_speech_key:
|
||||
st.warning("⚠️ 请配置 API Key")
|
||||
|
||||
|
||||
def render_soulvoice_engine_settings(tr):
|
||||
"""渲染 SoulVoice 引擎设置"""
|
||||
# API Key 输入
|
||||
api_key = st.text_input(
|
||||
"API Key",
|
||||
value=config.soulvoice.get("api_key", ""),
|
||||
type="password",
|
||||
help="请输入您的 SoulVoice API 密钥"
|
||||
)
|
||||
|
||||
# 音色 URI 输入
|
||||
voice_uri = st.text_input(
|
||||
"音色URI",
|
||||
value=config.soulvoice.get("voice_uri", "speech:mcg3fdnx:clzkyf4vy00e5qr6hywum4u84:bzznlkuhcjzpbosexitr"),
|
||||
help="请输入 SoulVoice 音色标识符",
|
||||
placeholder="speech:mcg3fdnx:clzkyf4vy00e5qr6hywum4u84:bzznlkuhcjzpbosexitr"
|
||||
)
|
||||
|
||||
# 模型名称选择
|
||||
model_options = [
|
||||
"FunAudioLLM/CosyVoice2-0.5B",
|
||||
"FunAudioLLM/CosyVoice-300M",
|
||||
"FunAudioLLM/CosyVoice-300M-SFT",
|
||||
"FunAudioLLM/CosyVoice-300M-Instruct"
|
||||
]
|
||||
|
||||
saved_model = config.soulvoice.get("model", "FunAudioLLM/CosyVoice2-0.5B")
|
||||
if saved_model not in model_options:
|
||||
model_options.append(saved_model)
|
||||
|
||||
model = st.selectbox(
|
||||
"模型名称",
|
||||
options=model_options,
|
||||
index=model_options.index(saved_model),
|
||||
help="选择使用的 TTS 模型"
|
||||
)
|
||||
|
||||
# 高级设置
|
||||
with st.expander("高级设置", expanded=False):
|
||||
api_url = st.text_input(
|
||||
"API 地址",
|
||||
value=config.soulvoice.get("api_url", "https://tts.scsmtech.cn/tts"),
|
||||
help="SoulVoice API 接口地址"
|
||||
)
|
||||
|
||||
# 保存配置
|
||||
config.soulvoice["api_key"] = api_key
|
||||
config.soulvoice["voice_uri"] = voice_uri
|
||||
config.soulvoice["model"] = model
|
||||
config.soulvoice["api_url"] = api_url
|
||||
|
||||
# 设置兼容性配置
|
||||
if voice_uri:
|
||||
# 确保音色 URI 有正确的前缀
|
||||
if not voice_uri.startswith("soulvoice:") and not voice_uri.startswith("speech:"):
|
||||
voice_name = f"soulvoice:{voice_uri}"
|
||||
else:
|
||||
voice_name = voice_uri if voice_uri.startswith("soulvoice:") else f"soulvoice:{voice_uri}"
|
||||
config.ui["voice_name"] = voice_name
|
||||
|
||||
# 显示配置状态
|
||||
if api_key and voice_uri:
|
||||
st.success("✅ SoulVoice 配置已设置")
|
||||
elif not api_key:
|
||||
st.warning("⚠️ 请配置 SoulVoice API Key")
|
||||
elif not voice_uri:
|
||||
st.warning("⚠️ 请配置音色 URI")
|
||||
|
||||
|
||||
def render_voice_preview_new(tr, selected_engine):
|
||||
"""渲染新的语音试听功能"""
|
||||
if st.button("🎵 试听语音合成", use_container_width=True):
|
||||
play_content = "感谢关注 NarratoAI,有任何问题或建议,可以关注微信公众号,求助或讨论"
|
||||
|
||||
# 根据选择的引擎获取对应的语音配置
|
||||
voice_name = ""
|
||||
voice_rate = 1.0
|
||||
voice_pitch = 1.0
|
||||
|
||||
if selected_engine == "edge_tts":
|
||||
voice_name = config.ui.get("edge_voice_name", "zh-CN-XiaoyiNeural-Female")
|
||||
voice_rate = config.ui.get("edge_rate", 1.0)
|
||||
voice_pitch = 1.0 + (config.ui.get("edge_pitch", 0) / 100.0)
|
||||
elif selected_engine == "azure_speech":
|
||||
voice_name = config.ui.get("azure_voice_name", "zh-CN-XiaoxiaoMultilingualNeural")
|
||||
voice_rate = config.ui.get("azure_rate", 1.0)
|
||||
voice_pitch = 1.0 + (config.ui.get("azure_pitch", 0) / 100.0)
|
||||
elif selected_engine == "soulvoice":
|
||||
voice_uri = config.soulvoice.get("voice_uri", "")
|
||||
if voice_uri:
|
||||
if not voice_uri.startswith("soulvoice:") and not voice_uri.startswith("speech:"):
|
||||
voice_name = f"soulvoice:{voice_uri}"
|
||||
else:
|
||||
voice_name = voice_uri if voice_uri.startswith("soulvoice:") else f"soulvoice:{voice_uri}"
|
||||
voice_rate = 1.0 # SoulVoice 使用默认语速
|
||||
voice_pitch = 1.0 # SoulVoice 不支持音调调节
|
||||
|
||||
if not voice_name:
|
||||
st.error("请先配置语音设置")
|
||||
return
|
||||
|
||||
with st.spinner("正在合成语音..."):
|
||||
temp_dir = utils.storage_dir("temp", create=True)
|
||||
audio_file = os.path.join(temp_dir, f"tmp-voice-{str(uuid4())}.mp3")
|
||||
|
||||
sub_maker = voice.tts(
|
||||
text=play_content,
|
||||
voice_name=voice_name,
|
||||
voice_rate=voice_rate,
|
||||
voice_pitch=voice_pitch,
|
||||
voice_file=audio_file,
|
||||
)
|
||||
|
||||
if sub_maker and os.path.exists(audio_file):
|
||||
st.success("✅ 语音合成成功!")
|
||||
|
||||
# 播放音频
|
||||
with open(audio_file, 'rb') as audio_file_obj:
|
||||
audio_bytes = audio_file_obj.read()
|
||||
st.audio(audio_bytes, format='audio/mp3')
|
||||
|
||||
# 清理临时文件
|
||||
try:
|
||||
os.remove(audio_file)
|
||||
except:
|
||||
pass
|
||||
else:
|
||||
st.error("❌ 语音合成失败,请检查配置")
|
||||
|
||||
|
||||
def render_azure_v2_settings(tr):
|
||||
"""渲染Azure V2语音设置"""
|
||||
"""渲染Azure V2语音设置(保留兼容性)"""
|
||||
saved_azure_speech_region = config.azure.get("speech_region", "")
|
||||
saved_azure_speech_key = config.azure.get("speech_key", "")
|
||||
|
||||
@ -93,8 +506,60 @@ def render_azure_v2_settings(tr):
|
||||
config.azure["speech_key"] = azure_speech_key
|
||||
|
||||
|
||||
def render_voice_parameters(tr):
|
||||
"""渲染语音参数设置"""
|
||||
def render_soulvoice_settings(tr):
|
||||
"""渲染 SoulVoice 语音设置(保留兼容性)"""
|
||||
saved_api_key = config.soulvoice.get("api_key", "")
|
||||
saved_api_url = config.soulvoice.get("api_url", "https://tts.scsmtech.cn/tts")
|
||||
saved_model = config.soulvoice.get("model", "FunAudioLLM/CosyVoice2-0.5B")
|
||||
saved_voice_uri = config.soulvoice.get("voice_uri", "speech:mcg3fdnx:clzkyf4vy00e5qr6hywum4u84:bzznlkuhcjzpbosexitr")
|
||||
|
||||
# API Key 输入
|
||||
api_key = st.text_input(
|
||||
"SoulVoice API Key",
|
||||
value=saved_api_key,
|
||||
type="password",
|
||||
help="请输入您的 SoulVoice API 密钥"
|
||||
)
|
||||
|
||||
# 音色 URI 输入
|
||||
voice_uri = st.text_input(
|
||||
"音色 URI",
|
||||
value=saved_voice_uri,
|
||||
help="请输入 SoulVoice 音色标识符,格式如:speech:mcg3fdnx:clzkyf4vy00e5qr6hywum4u84:bzznlkuhcjzpbosexitr",
|
||||
placeholder="speech:mcg3fdnx:clzkyf4vy00e5qr6hywum4u84:bzznlkuhcjzpbosexitr"
|
||||
)
|
||||
|
||||
# API URL 输入(可选)
|
||||
with st.expander("高级设置", expanded=False):
|
||||
api_url = st.text_input(
|
||||
"API 地址",
|
||||
value=saved_api_url,
|
||||
help="SoulVoice API 接口地址"
|
||||
)
|
||||
|
||||
model = st.text_input(
|
||||
"模型名称",
|
||||
value=saved_model,
|
||||
help="使用的 TTS 模型"
|
||||
)
|
||||
|
||||
# 保存配置
|
||||
config.soulvoice["api_key"] = api_key
|
||||
config.soulvoice["voice_uri"] = voice_uri
|
||||
config.soulvoice["api_url"] = api_url
|
||||
config.soulvoice["model"] = model
|
||||
|
||||
# 显示配置状态
|
||||
if api_key and voice_uri:
|
||||
st.success("✅ SoulVoice 配置已设置")
|
||||
elif not api_key:
|
||||
st.warning("⚠️ 请配置 SoulVoice API Key")
|
||||
elif not voice_uri:
|
||||
st.warning("⚠️ 请配置音色 URI")
|
||||
|
||||
|
||||
def render_voice_parameters(tr, voice_name):
|
||||
"""渲染语音参数设置(保留兼容性)"""
|
||||
# 音量 - 使用统一的默认值
|
||||
voice_volume = st.slider(
|
||||
tr("Speech Volume"),
|
||||
@ -106,22 +571,41 @@ def render_voice_parameters(tr):
|
||||
)
|
||||
st.session_state['voice_volume'] = voice_volume
|
||||
|
||||
# 检查是否为 SoulVoice 引擎
|
||||
is_soulvoice = voice.is_soulvoice_voice(voice_name)
|
||||
|
||||
# 语速
|
||||
voice_rate = st.selectbox(
|
||||
tr("Speech Rate"),
|
||||
options=[0.8, 0.9, 1.0, 1.1, 1.2, 1.3, 1.5, 1.8, 2.0],
|
||||
index=2,
|
||||
)
|
||||
if is_soulvoice:
|
||||
# SoulVoice 支持更精细的语速控制
|
||||
voice_rate = st.slider(
|
||||
tr("Speech Rate"),
|
||||
min_value=0.5,
|
||||
max_value=2.0,
|
||||
value=1.0,
|
||||
step=0.1,
|
||||
help="SoulVoice 语音速度控制"
|
||||
)
|
||||
else:
|
||||
# Azure TTS 使用预设选项
|
||||
voice_rate = st.selectbox(
|
||||
tr("Speech Rate"),
|
||||
options=[0.8, 0.9, 1.0, 1.1, 1.2, 1.3, 1.5, 1.8, 2.0],
|
||||
index=2,
|
||||
)
|
||||
st.session_state['voice_rate'] = voice_rate
|
||||
|
||||
# 音调
|
||||
voice_pitch = st.selectbox(
|
||||
tr("Speech Pitch"),
|
||||
options=[0.8, 0.9, 1.0, 1.1, 1.2, 1.3, 1.5, 1.8, 2.0],
|
||||
index=2,
|
||||
)
|
||||
st.session_state['voice_pitch'] = voice_pitch
|
||||
# 音调 - SoulVoice 不支持音调调节
|
||||
if not is_soulvoice:
|
||||
voice_pitch = st.selectbox(
|
||||
tr("Speech Pitch"),
|
||||
options=[0.8, 0.9, 1.0, 1.1, 1.2, 1.3, 1.5, 1.8, 2.0],
|
||||
index=2,
|
||||
)
|
||||
st.session_state['voice_pitch'] = voice_pitch
|
||||
else:
|
||||
# SoulVoice 不支持音调调节,设置默认值
|
||||
st.session_state['voice_pitch'] = 1.0
|
||||
st.info("ℹ️ SoulVoice 引擎不支持音调调节")
|
||||
|
||||
|
||||
def render_voice_preview(tr, voice_name):
|
||||
@ -157,9 +641,12 @@ def render_voice_preview(tr, voice_name):
|
||||
)
|
||||
|
||||
if sub_maker and os.path.exists(audio_file):
|
||||
st.success(tr("Voice synthesis successful"))
|
||||
st.audio(audio_file, format="audio/mp3")
|
||||
if os.path.exists(audio_file):
|
||||
os.remove(audio_file)
|
||||
else:
|
||||
st.error(tr("Voice synthesis failed"))
|
||||
|
||||
|
||||
def render_bgm_settings(tr):
|
||||
|
||||
@ -1,296 +0,0 @@
|
||||
import os
|
||||
import time
|
||||
import streamlit as st
|
||||
from loguru import logger
|
||||
from typing import List, Dict
|
||||
from dataclasses import dataclass
|
||||
from streamlit.runtime.uploaded_file_manager import UploadedFile
|
||||
|
||||
from webui.utils.merge_video import merge_videos_and_subtitles
|
||||
from app.utils.utils import video_dir, srt_dir
|
||||
|
||||
# 定义临时目录路径
|
||||
TEMP_MERGE_DIR = os.path.join("storage", "temp", "merge")
|
||||
|
||||
# 确保临时目录存在
|
||||
os.makedirs(TEMP_MERGE_DIR, exist_ok=True)
|
||||
|
||||
|
||||
@dataclass
|
||||
class VideoSubtitlePair:
|
||||
video_file: UploadedFile | None
|
||||
subtitle_file: str | None
|
||||
base_name: str
|
||||
order: int = 0
|
||||
|
||||
|
||||
def save_uploaded_file(uploaded_file: UploadedFile, target_dir: str) -> str:
|
||||
"""Save uploaded file to target directory and return the file path"""
|
||||
file_path = os.path.join(target_dir, uploaded_file.name)
|
||||
# 如果文件已存在,先删除它
|
||||
if os.path.exists(file_path):
|
||||
os.remove(file_path)
|
||||
with open(file_path, "wb") as f:
|
||||
f.write(uploaded_file.getvalue())
|
||||
return file_path
|
||||
|
||||
|
||||
def clean_temp_dir():
|
||||
"""清空临时目录"""
|
||||
if os.path.exists(TEMP_MERGE_DIR):
|
||||
for file in os.listdir(TEMP_MERGE_DIR):
|
||||
file_path = os.path.join(TEMP_MERGE_DIR, file)
|
||||
try:
|
||||
if os.path.isfile(file_path):
|
||||
os.unlink(file_path)
|
||||
except Exception as e:
|
||||
logger.error(f"清理临时文件失败: {str(e)}")
|
||||
|
||||
|
||||
def group_files(files: List[UploadedFile]) -> Dict[str, VideoSubtitlePair]:
|
||||
"""Group uploaded files by their base names"""
|
||||
pairs = {}
|
||||
order_counter = 0
|
||||
|
||||
# 首先处理所有视频文件
|
||||
for file in files:
|
||||
base_name = os.path.splitext(file.name)[0]
|
||||
ext = os.path.splitext(file.name)[1].lower()
|
||||
|
||||
if ext == ".mp4":
|
||||
if base_name not in pairs:
|
||||
pairs[base_name] = VideoSubtitlePair(None, None, base_name, order_counter)
|
||||
order_counter += 1
|
||||
pairs[base_name].video_file = file
|
||||
# 保存视频文件到临时目录
|
||||
video_path = save_uploaded_file(file, TEMP_MERGE_DIR)
|
||||
|
||||
# 然后处理所有字幕文件
|
||||
for file in files:
|
||||
base_name = os.path.splitext(file.name)[0]
|
||||
ext = os.path.splitext(file.name)[1].lower()
|
||||
|
||||
if ext == ".srt":
|
||||
# 即使没有对应视频也保存字幕文件
|
||||
subtitle_path = os.path.join(TEMP_MERGE_DIR, f"{base_name}.srt")
|
||||
save_uploaded_file(file, TEMP_MERGE_DIR)
|
||||
|
||||
if base_name in pairs: # 如果有对应的视频
|
||||
pairs[base_name].subtitle_file = subtitle_path
|
||||
|
||||
return pairs
|
||||
|
||||
|
||||
def render_merge_settings(tr):
|
||||
"""Render the merge settings section"""
|
||||
with st.expander(tr("Video Subtitle Merge"), expanded=False):
|
||||
# 上传文件区域
|
||||
uploaded_files = st.file_uploader(
|
||||
tr("Upload Video and Subtitle Files"),
|
||||
type=["mp4", "srt"],
|
||||
accept_multiple_files=True,
|
||||
key="merge_files"
|
||||
)
|
||||
|
||||
if uploaded_files:
|
||||
all_pairs = group_files(uploaded_files)
|
||||
|
||||
if all_pairs:
|
||||
st.write(tr("All Uploaded Files"))
|
||||
|
||||
# 初始化或更新session state中的排序信息
|
||||
if 'file_orders' not in st.session_state:
|
||||
st.session_state.file_orders = {
|
||||
name: pair.order for name, pair in all_pairs.items()
|
||||
}
|
||||
st.session_state.needs_reorder = False
|
||||
|
||||
# 确保所有新文件都有排序值
|
||||
for name, pair in all_pairs.items():
|
||||
if name not in st.session_state.file_orders:
|
||||
st.session_state.file_orders[name] = pair.order
|
||||
|
||||
# 移除不存在的文件的排序值
|
||||
st.session_state.file_orders = {
|
||||
k: v for k, v in st.session_state.file_orders.items()
|
||||
if k in all_pairs
|
||||
}
|
||||
|
||||
# 按照排序值对文件对进行排序
|
||||
sorted_pairs = sorted(
|
||||
all_pairs.items(),
|
||||
key=lambda x: st.session_state.file_orders[x[0]]
|
||||
)
|
||||
|
||||
# 计算需要多少行来显示所有视频(每行5个)
|
||||
num_pairs = len(sorted_pairs)
|
||||
num_rows = (num_pairs + 4) // 5 # 向上取整,每行5个
|
||||
|
||||
# 遍历每一行
|
||||
for row in range(num_rows):
|
||||
# 创建5列
|
||||
cols = st.columns(5)
|
||||
|
||||
# 在这一行中填充视频(最多5个)
|
||||
for col_idx in range(5):
|
||||
pair_idx = row * 5 + col_idx
|
||||
if pair_idx < num_pairs:
|
||||
base_name, pair = sorted_pairs[pair_idx]
|
||||
with cols[col_idx]:
|
||||
st.caption(base_name)
|
||||
|
||||
# 显示视频预览(如果存在)
|
||||
video_path = os.path.join(TEMP_MERGE_DIR, f"{base_name}.mp4")
|
||||
if os.path.exists(video_path):
|
||||
st.video(video_path)
|
||||
else:
|
||||
st.warning(tr("Missing Video"))
|
||||
|
||||
# 显示字幕预览(如果存在)
|
||||
subtitle_path = os.path.join(TEMP_MERGE_DIR, f"{base_name}.srt")
|
||||
if os.path.exists(subtitle_path):
|
||||
with open(subtitle_path, 'r', encoding='utf-8') as f:
|
||||
subtitle_content = f.read()
|
||||
st.markdown(tr("Subtitle Preview"))
|
||||
st.text_area(
|
||||
"Subtitle Content",
|
||||
value=subtitle_content,
|
||||
height=100, # 减高度以适应5列布局
|
||||
label_visibility="collapsed",
|
||||
key=f"subtitle_preview_{base_name}"
|
||||
)
|
||||
else:
|
||||
st.warning(tr("Missing Subtitle"))
|
||||
# 如果有视频但没有字幕,显示一键转录按钮
|
||||
# if os.path.exists(video_path):
|
||||
# if st.button(tr("One-Click Transcribe"), key=f"transcribe_{base_name}"):
|
||||
# with st.spinner(tr("Transcribing...")):
|
||||
# try:
|
||||
# # 生成字幕文件
|
||||
# result = extract_audio_and_create_subtitle(video_path, subtitle_path)
|
||||
# if result:
|
||||
# # 读取生成的字幕文件内容并显示预览
|
||||
# with open(subtitle_path, 'r', encoding='utf-8') as f:
|
||||
# subtitle_content = f.read()
|
||||
# st.markdown(tr("Subtitle Preview"))
|
||||
# st.text_area(
|
||||
# "Subtitle Content",
|
||||
# value=subtitle_content,
|
||||
# height=150,
|
||||
# label_visibility="collapsed",
|
||||
# key=f"subtitle_preview_transcribed_{base_name}"
|
||||
# )
|
||||
# st.success(tr("Transcription Complete!"))
|
||||
# # 更新pair的字幕文件路径
|
||||
# pair.subtitle_file = subtitle_path
|
||||
# else:
|
||||
# st.error(tr("Transcription Failed. Please try again."))
|
||||
# except Exception as e:
|
||||
# error_message = str(e)
|
||||
# logger.error(traceback.format_exc())
|
||||
# if "rate limit exceeded" in error_message.lower():
|
||||
# st.error(tr("API rate limit exceeded. Please wait about an hour and try again."))
|
||||
# elif "resource_exhausted" in error_message.lower():
|
||||
# st.error(tr("Resources exhausted. Please try again later."))
|
||||
# else:
|
||||
# st.error(f"{tr('Transcription Failed')}: {str(e)}")
|
||||
|
||||
# 排序输入框
|
||||
order = st.number_input(
|
||||
tr("Order"),
|
||||
min_value=0,
|
||||
value=st.session_state.file_orders[base_name],
|
||||
key=f"order_{base_name}",
|
||||
on_change=lambda: setattr(st.session_state, 'needs_reorder', True)
|
||||
)
|
||||
if order != st.session_state.file_orders[base_name]:
|
||||
st.session_state.file_orders[base_name] = order
|
||||
st.session_state.needs_reorder = True
|
||||
|
||||
# 如果需要重新排序,重新加载页面
|
||||
if st.session_state.needs_reorder:
|
||||
st.session_state.needs_reorder = False
|
||||
st.rerun()
|
||||
|
||||
# 找出有完整视频和字幕的文件对
|
||||
complete_pairs = {
|
||||
k: v for k, v in all_pairs.items()
|
||||
if os.path.exists(os.path.join(TEMP_MERGE_DIR, f"{k}.mp4")) and
|
||||
os.path.exists(os.path.join(TEMP_MERGE_DIR, f"{k}.srt"))
|
||||
}
|
||||
|
||||
# 合并按钮和结果显示
|
||||
cols = st.columns([1, 2, 1])
|
||||
with cols[0]:
|
||||
st.write(f"{tr('Mergeable Files')}: {len(complete_pairs)}")
|
||||
|
||||
merge_videos_result = None
|
||||
|
||||
with cols[1]:
|
||||
if st.button(tr("Merge All Files"), type="primary", use_container_width=True):
|
||||
try:
|
||||
# 获取排序后的完整文件对
|
||||
sorted_complete_pairs = sorted(
|
||||
[(k, v) for k, v in complete_pairs.items()],
|
||||
key=lambda x: st.session_state.file_orders[x[0]]
|
||||
)
|
||||
|
||||
video_paths = []
|
||||
subtitle_paths = []
|
||||
for base_name, _ in sorted_complete_pairs:
|
||||
video_paths.append(os.path.join(TEMP_MERGE_DIR, f"{base_name}.mp4"))
|
||||
subtitle_paths.append(os.path.join(TEMP_MERGE_DIR, f"{base_name}.srt"))
|
||||
|
||||
# 获取输出文件路径
|
||||
output_video = os.path.join(video_dir(), f"merged_video_{time.strftime('%M%S')}.mp4")
|
||||
output_subtitle = os.path.join(srt_dir(), f"merged_subtitle_{time.strftime('%M%S')}.srt")
|
||||
|
||||
with st.spinner(tr("Merging files...")):
|
||||
# 合并文件
|
||||
merge_videos_and_subtitles(
|
||||
video_paths,
|
||||
subtitle_paths,
|
||||
output_video,
|
||||
output_subtitle
|
||||
)
|
||||
|
||||
success = True
|
||||
error_msg = ""
|
||||
|
||||
# 检查输出文件是否成功生成
|
||||
if not os.path.exists(output_video):
|
||||
success = False
|
||||
error_msg += tr("Failed to generate merged video. ")
|
||||
if not os.path.exists(output_subtitle):
|
||||
success = False
|
||||
error_msg += tr("Failed to generate merged subtitle. ")
|
||||
|
||||
if success:
|
||||
# 显示成功消息
|
||||
st.success(tr("Merge completed!"))
|
||||
merge_videos_result = (output_video, output_subtitle)
|
||||
# 清理临时目录
|
||||
clean_temp_dir()
|
||||
else:
|
||||
st.error(error_msg)
|
||||
|
||||
except Exception as e:
|
||||
error_message = str(e)
|
||||
if "moviepy" in error_message.lower():
|
||||
st.error(tr("Error processing video files. Please check if the videos are valid MP4 files."))
|
||||
# elif "pysrt" in error_message.lower():
|
||||
# st.error(tr("Error processing subtitle files. Please check if the subtitles are valid SRT files."))
|
||||
else:
|
||||
st.error(f"{tr('Error during merge')}: {error_message}")
|
||||
|
||||
# 合并结果预览放在合并按钮下方
|
||||
if merge_videos_result:
|
||||
st.markdown(f"<h3 style='text-align: center'>{tr('Merge Result Preview')}</h3>", unsafe_allow_html=True)
|
||||
# 使用列布局使视频居中
|
||||
col1, col2, col3 = st.columns([1,2,1])
|
||||
with col2:
|
||||
st.video(merge_videos_result[0])
|
||||
st.code(f"{tr('Video Path')}: {merge_videos_result[0]}")
|
||||
st.code(f"{tr('Subtitle Path')}: {merge_videos_result[1]}")
|
||||
else:
|
||||
st.warning(tr("No Files Found"))
|
||||
@ -1,88 +0,0 @@
|
||||
import streamlit as st
|
||||
import os
|
||||
from loguru import logger
|
||||
|
||||
|
||||
def render_review_panel(tr):
|
||||
"""渲染视频审查面板"""
|
||||
with st.expander(tr("Video Check"), expanded=False):
|
||||
try:
|
||||
video_list = st.session_state.get('video_clip_json', [])
|
||||
subclip_videos = st.session_state.get('subclip_videos', {})
|
||||
except KeyError:
|
||||
video_list = []
|
||||
subclip_videos = {}
|
||||
|
||||
# 计算列数和行数
|
||||
num_videos = len(video_list)
|
||||
cols_per_row = 3
|
||||
rows = (num_videos + cols_per_row - 1) // cols_per_row # 向上取整计算行数
|
||||
|
||||
# 使用容器展示视频
|
||||
for row in range(rows):
|
||||
cols = st.columns(cols_per_row)
|
||||
for col in range(cols_per_row):
|
||||
index = row * cols_per_row + col
|
||||
if index < num_videos:
|
||||
with cols[col]:
|
||||
render_video_item(tr, video_list, subclip_videos, index)
|
||||
|
||||
|
||||
def render_video_item(tr, video_list, subclip_videos, index):
|
||||
"""渲染单个视频项"""
|
||||
video_script = video_list[index]
|
||||
|
||||
# 显示时间戳
|
||||
timestamp = video_script.get('_id', '')
|
||||
st.text_area(
|
||||
tr("Timestamp"),
|
||||
value=timestamp,
|
||||
height=70,
|
||||
disabled=True,
|
||||
key=f"timestamp_{index}"
|
||||
)
|
||||
|
||||
# 显示视频播放器
|
||||
video_path = subclip_videos.get(timestamp)
|
||||
if video_path and os.path.exists(video_path):
|
||||
try:
|
||||
st.video(video_path)
|
||||
except Exception as e:
|
||||
logger.error(f"加载视频失败 {video_path}: {e}")
|
||||
st.error(f"无法加载视频: {os.path.basename(video_path)}")
|
||||
else:
|
||||
st.warning(tr("视频文件未找到"))
|
||||
|
||||
# 显示画面描述
|
||||
st.text_area(
|
||||
tr("Picture Description"),
|
||||
value=video_script.get('picture', ''),
|
||||
height=150,
|
||||
disabled=True,
|
||||
key=f"picture_{index}"
|
||||
)
|
||||
|
||||
# 显示旁白文本
|
||||
narration = st.text_area(
|
||||
tr("Narration"),
|
||||
value=video_script.get('narration', ''),
|
||||
height=150,
|
||||
key=f"narration_{index}"
|
||||
)
|
||||
# 保存修改后的旁白文本
|
||||
if narration != video_script.get('narration', ''):
|
||||
video_script['narration'] = narration
|
||||
st.session_state['video_clip_json'] = video_list
|
||||
|
||||
# 显示剪辑模式
|
||||
ost = st.selectbox(
|
||||
tr("Clip Mode"),
|
||||
options=range(0, 3),
|
||||
index=video_script.get('OST', 0),
|
||||
key=f"ost_{index}",
|
||||
help=tr("0: Keep the audio only, 1: Keep the original sound only, 2: Keep the original sound and audio")
|
||||
)
|
||||
# 保存修改后的剪辑模式
|
||||
if ost != video_script.get('OST', 0):
|
||||
video_script['OST'] = ost
|
||||
st.session_state['video_clip_json'] = video_list
|
||||
@ -333,38 +333,12 @@ def render_script_buttons(tr, params):
|
||||
video_clip_json_details = st.text_area(
|
||||
tr("Video Script"),
|
||||
value=json.dumps(st.session_state.get('video_clip_json', []), indent=2, ensure_ascii=False),
|
||||
height=180
|
||||
height=500
|
||||
)
|
||||
|
||||
# 操作按钮行
|
||||
button_cols = st.columns(3)
|
||||
with button_cols[0]:
|
||||
if st.button(tr("Check Format"), key="check_format", use_container_width=True):
|
||||
check_script_format(tr, video_clip_json_details)
|
||||
|
||||
with button_cols[1]:
|
||||
if st.button(tr("Save Script"), key="save_script", use_container_width=True):
|
||||
save_script(tr, video_clip_json_details)
|
||||
|
||||
with button_cols[2]:
|
||||
script_valid = st.session_state.get('script_format_valid', False)
|
||||
if st.button(tr("Crop Video"), key="crop_video", disabled=not script_valid, use_container_width=True):
|
||||
crop_video(tr, params)
|
||||
|
||||
|
||||
def check_script_format(tr, script_content):
|
||||
"""检查脚本格式"""
|
||||
try:
|
||||
result = check_script.check_format(script_content)
|
||||
if result.get('success'):
|
||||
st.success(tr("Script format check passed"))
|
||||
st.session_state['script_format_valid'] = True
|
||||
else:
|
||||
st.error(f"{tr('Script format check failed')}: {result.get('message')}")
|
||||
st.session_state['script_format_valid'] = False
|
||||
except Exception as e:
|
||||
st.error(f"{tr('Script format check error')}: {str(e)}")
|
||||
st.session_state['script_format_valid'] = False
|
||||
# 操作按钮行 - 合并格式检查和保存功能
|
||||
if st.button(tr("Save Script"), key="save_script", use_container_width=True):
|
||||
save_script_with_validation(tr, video_clip_json_details)
|
||||
|
||||
|
||||
def load_script(tr, script_path):
|
||||
@ -381,12 +355,52 @@ def load_script(tr, script_path):
|
||||
st.error(f"{tr('Failed to load script')}: {str(e)}")
|
||||
|
||||
|
||||
def save_script(tr, video_clip_json_details):
|
||||
"""保存视频脚本"""
|
||||
def save_script_with_validation(tr, video_clip_json_details):
|
||||
"""保存视频脚本(包含格式验证)"""
|
||||
if not video_clip_json_details:
|
||||
st.error(tr("请输入视频脚本"))
|
||||
st.stop()
|
||||
|
||||
# 第一步:格式验证
|
||||
with st.spinner("正在验证脚本格式..."):
|
||||
try:
|
||||
result = check_script.check_format(video_clip_json_details)
|
||||
if not result.get('success'):
|
||||
# 格式验证失败,显示详细错误信息
|
||||
error_message = result.get('message', '未知错误')
|
||||
error_details = result.get('details', '')
|
||||
|
||||
st.error(f"**脚本格式验证失败**")
|
||||
st.error(f"**错误信息:** {error_message}")
|
||||
if error_details:
|
||||
st.error(f"**详细说明:** {error_details}")
|
||||
|
||||
# 显示正确格式示例
|
||||
st.info("**正确的脚本格式示例:**")
|
||||
example_script = [
|
||||
{
|
||||
"_id": 1,
|
||||
"timestamp": "00:00:00,600-00:00:07,559",
|
||||
"picture": "工地上,蔡晓艳奋力救人,场面混乱",
|
||||
"narration": "灾后重建,工地上险象环生!泼辣女工蔡晓艳挺身而出,救人第一!",
|
||||
"OST": 0
|
||||
},
|
||||
{
|
||||
"_id": 2,
|
||||
"timestamp": "00:00:08,240-00:00:12,359",
|
||||
"picture": "领导视察,蔡晓艳不屑一顾",
|
||||
"narration": "播放原片4",
|
||||
"OST": 1
|
||||
}
|
||||
]
|
||||
st.code(json.dumps(example_script, ensure_ascii=False, indent=2), language='json')
|
||||
st.stop()
|
||||
|
||||
except Exception as e:
|
||||
st.error(f"格式验证过程中发生错误: {str(e)}")
|
||||
st.stop()
|
||||
|
||||
# 第二步:保存脚本
|
||||
with st.spinner(tr("Save Script")):
|
||||
script_dir = utils.script_dir()
|
||||
timestamp = time.strftime("%Y-%m%d-%H%M%S")
|
||||
@ -403,7 +417,7 @@ def save_script(tr, video_clip_json_details):
|
||||
config.app["video_clip_json_path"] = save_path
|
||||
|
||||
# 显示成功消息
|
||||
st.success(tr("Script saved successfully"))
|
||||
st.success("✅ 脚本格式验证通过,保存成功!")
|
||||
|
||||
# 强制重新加载页面更新选择框
|
||||
time.sleep(0.5) # 给一点时间让用户看到成功消息
|
||||
@ -414,26 +428,7 @@ def save_script(tr, video_clip_json_details):
|
||||
st.stop()
|
||||
|
||||
|
||||
def crop_video(tr, params):
|
||||
"""裁剪视频"""
|
||||
progress_bar = st.progress(0)
|
||||
status_text = st.empty()
|
||||
|
||||
def update_progress(progress):
|
||||
progress_bar.progress(progress)
|
||||
status_text.text(f"剪辑进度: {progress}%")
|
||||
|
||||
try:
|
||||
utils.cut_video(params, update_progress)
|
||||
time.sleep(0.5)
|
||||
progress_bar.progress(100)
|
||||
st.success("视频剪辑成功完成!")
|
||||
except Exception as e:
|
||||
st.error(f"剪辑过程中发生错误: {str(e)}")
|
||||
finally:
|
||||
time.sleep(1)
|
||||
progress_bar.empty()
|
||||
status_text.empty()
|
||||
# crop_video函数已移除 - 现在使用统一裁剪策略,不再需要预裁剪步骤
|
||||
|
||||
|
||||
def get_script_params():
|
||||
|
||||
@ -9,14 +9,35 @@ def render_subtitle_panel(tr):
|
||||
with st.container(border=True):
|
||||
st.write(tr("Subtitle Settings"))
|
||||
|
||||
# 启用字幕选项
|
||||
enable_subtitles = st.checkbox(tr("Enable Subtitles"), value=True)
|
||||
st.session_state['subtitle_enabled'] = enable_subtitles
|
||||
# 检查是否选择了 SoulVoice 引擎
|
||||
from app.services import voice
|
||||
current_voice = st.session_state.get('voice_name', '')
|
||||
is_soulvoice = voice.is_soulvoice_voice(current_voice)
|
||||
|
||||
if enable_subtitles:
|
||||
render_font_settings(tr)
|
||||
render_position_settings(tr)
|
||||
render_style_settings(tr)
|
||||
if is_soulvoice:
|
||||
# SoulVoice 引擎时显示禁用提示
|
||||
st.warning("⚠️ SoulVoice TTS 不支持精确字幕生成")
|
||||
st.info("💡 建议使用专业剪辑工具(如剪映、PR等)手动添加字幕")
|
||||
|
||||
# 强制禁用字幕
|
||||
st.session_state['subtitle_enabled'] = False
|
||||
|
||||
# 显示禁用状态的复选框
|
||||
st.checkbox(
|
||||
tr("Enable Subtitles"),
|
||||
value=False,
|
||||
disabled=True,
|
||||
help="SoulVoice 引擎不支持字幕生成,请使用其他 TTS 引擎"
|
||||
)
|
||||
else:
|
||||
# 其他引擎正常显示字幕选项
|
||||
enable_subtitles = st.checkbox(tr("Enable Subtitles"), value=True)
|
||||
st.session_state['subtitle_enabled'] = enable_subtitles
|
||||
|
||||
if enable_subtitles:
|
||||
render_font_settings(tr)
|
||||
render_position_settings(tr)
|
||||
render_style_settings(tr)
|
||||
|
||||
|
||||
def render_font_settings(tr):
|
||||
|
||||
@ -29,7 +29,7 @@
|
||||
"Clip Duration": "Maximum Clip Duration (Seconds) (**Not the total length of the video**, refers to the length of each **composite segment**)",
|
||||
"Number of Videos Generated Simultaneously": "Number of Videos Generated Simultaneously",
|
||||
"Audio Settings": "**Audio Settings**",
|
||||
"Speech Synthesis": "Speech Synthesis Voice (:red[**Keep consistent with the script language**. Note: V2 version performs better, but requires an API KEY])",
|
||||
"Speech Synthesis": "Speech Synthesis Voice (:red[**Keep consistent with the script language**. Note: V2 version performs better, but requires an API KEY; SoulVoice provides high-quality Chinese voices])",
|
||||
"Speech Region": "Service Region (:red[Required, [Click to Get](https://portal.azure.com/#view/Microsoft_Azure_ProjectOxford/CognitiveServicesHub/~/SpeechServices)])",
|
||||
"Speech Key": "API Key (:red[Required, either Key 1 or Key 2 is acceptable [Click to Get](https://portal.azure.com/#view/Microsoft_Azure_ProjectOxford/CognitiveServicesHub/~/SpeechServices)])",
|
||||
"Speech Volume": "Speech Volume (1.0 represents 100%)",
|
||||
|
||||
@ -11,7 +11,6 @@
|
||||
"Video Theme": "视频主题",
|
||||
"Generation Prompt": "自定义提示词",
|
||||
"Save Script": "保存脚本",
|
||||
"Crop Video": "裁剪视频",
|
||||
"Video File": "视频文件(:blue[1️⃣支持上传视频文件(限制2G) 2️⃣大文件建议直接导入 ./resource/videos 目录])",
|
||||
"Plot Description": "剧情描述 (:blue[可从 https://www.tvmao.com/ 获取])",
|
||||
"Generate Video Keywords": "点击使用AI根据**文案**生成【视频关键】",
|
||||
@ -29,7 +28,7 @@
|
||||
"Clip Duration": "视频片段最大时长(秒)(**不是视频总长度**,是指每个**合成片段**的长度)",
|
||||
"Number of Videos Generated Simultaneously": "同时生成视频数量",
|
||||
"Audio Settings": "**音频设置**",
|
||||
"Speech Synthesis": "朗读声音(:red[**与文案语言保持一致**。注意:V2版效果更好,但是需要API KEY])",
|
||||
"Speech Synthesis": "朗读声音(:red[**与文案语言保持一致**。注意:V2版效果更好,但是需要API KEY;SoulVoice 提供高质量中文语音])",
|
||||
"Speech Region": "服务区域 (:red[必填,[点击获取](https://portal.azure.com/#view/Microsoft_Azure_ProjectOxford/CognitiveServicesHub/~/SpeechServices)])",
|
||||
"Speech Key": "API Key (:red[必填,密钥1 或 密钥2 均可 [点击获取](https://portal.azure.com/#view/Microsoft_Azure_ProjectOxford/CognitiveServicesHub/~/SpeechServices)])",
|
||||
"Speech Volume": "朗读音量(1.0表示100%)",
|
||||
@ -82,7 +81,6 @@
|
||||
"TTS Provider": "语音合成提供商",
|
||||
"Hide Log": "隐藏日志",
|
||||
"Upload Local Files": "上传本地文件",
|
||||
"Video Check": "视频审查",
|
||||
"File Uploaded Successfully": "文件上传成功",
|
||||
"timestamp": "时间戳",
|
||||
"Picture description": "图片描述",
|
||||
@ -137,31 +135,6 @@
|
||||
"Script Uploaded Successfully": "脚本上传成功",
|
||||
"Invalid JSON format": "无效的JSON格式",
|
||||
"Upload failed": "上传失败",
|
||||
"Video Subtitle Merge": "**合并视频与字幕**",
|
||||
"Upload Video and Subtitle Files": "上传视频和字幕文件",
|
||||
"Matched File Pairs": "已匹配的文件对",
|
||||
"Merge All Files": "合并所有文件",
|
||||
"Merge Function Not Implemented": "合并功能待实现",
|
||||
"No Matched Pairs Found": "未找到匹配的文件对",
|
||||
"Missing Subtitle": "缺少对应的字幕文件, 请使用其他软件完成字幕转录,比如剪映等",
|
||||
"Missing Video": "缺少对应的视频文件",
|
||||
"All Uploaded Files": "所有上传的文件",
|
||||
"Order": "排序序号",
|
||||
"Reorder": "重新排序",
|
||||
"Merging files...": "正在合并文件...",
|
||||
"Merge completed!": "合并完成!",
|
||||
"Download Merged Video": "下载合并后的视频",
|
||||
"Download Merged Subtitle": "下载合并后的字幕",
|
||||
"Error during merge": "合并过程中出错",
|
||||
"Failed to generate merged video.": "生成合并视频失败。",
|
||||
"Failed to generate merged subtitle.": "生成合并字幕失败。",
|
||||
"Error reading merged video file": "读取合并后的视频文件时出错",
|
||||
"Error reading merged subtitle file": "读取合并后的字幕文件时出错",
|
||||
"Error processing video files. Please check if the videos are valid MP4 files.": "处理视频文件时出错。请检查视频是否为有效的MP4文件。",
|
||||
"Error processing subtitle files. Please check if the subtitles are valid SRT files.": "处理字幕文件时出错。请检查字幕是否为有效的SRT文件。",
|
||||
"Preview Merged Video": "预览合并后的视频",
|
||||
"Video Path": "视频路径",
|
||||
"Subtitle Path": "字幕路径",
|
||||
"Enable Proxy": "启用代理",
|
||||
"QwenVL model is available": "QwenVL 模型可用",
|
||||
"QwenVL model is not available": "QwenVL 模型不可用",
|
||||
@ -184,9 +157,6 @@
|
||||
"API rate limit exceeded. Please wait about an hour and try again.": "API 调用次数已达到限制,请等待约一小时后再试。",
|
||||
"Resources exhausted. Please try again later.": "资源已耗尽,请稍后再试。",
|
||||
"Transcription Failed": "转录失败",
|
||||
"Mergeable Files": "可合并文件数",
|
||||
"Subtitle Content": "字幕内容",
|
||||
"Merge Result Preview": "合并结果预览",
|
||||
"Short Generate": "短剧混剪 (高燃剪辑)",
|
||||
"Generate Short Video Script": "AI生成短剧混剪脚本",
|
||||
"Adjust the volume of the original audio": "调整原始音频的音量",
|
||||
|
||||
@ -1,115 +0,0 @@
|
||||
"""
|
||||
合并视频和字幕文件
|
||||
"""
|
||||
import os
|
||||
import pysrt
|
||||
from moviepy import VideoFileClip, concatenate_videoclips
|
||||
|
||||
|
||||
def get_video_duration(video_path):
|
||||
"""获取视频时长(秒)"""
|
||||
video = VideoFileClip(video_path)
|
||||
duration = video.duration
|
||||
video.close()
|
||||
return duration
|
||||
|
||||
|
||||
def adjust_subtitle_timing(subtitle_path, time_offset):
|
||||
"""调整字幕时间戳"""
|
||||
subs = pysrt.open(subtitle_path)
|
||||
|
||||
# 为每个字幕项添加时间偏移
|
||||
for sub in subs:
|
||||
sub.start.hours += int(time_offset / 3600)
|
||||
sub.start.minutes += int((time_offset % 3600) / 60)
|
||||
sub.start.seconds += int(time_offset % 60)
|
||||
sub.start.milliseconds += int((time_offset * 1000) % 1000)
|
||||
|
||||
sub.end.hours += int(time_offset / 3600)
|
||||
sub.end.minutes += int((time_offset % 3600) / 60)
|
||||
sub.end.seconds += int(time_offset % 60)
|
||||
sub.end.milliseconds += int((time_offset * 1000) % 1000)
|
||||
|
||||
return subs
|
||||
|
||||
|
||||
def merge_videos_and_subtitles(video_paths, subtitle_paths, output_video_path, output_subtitle_path):
|
||||
"""合并视频和字幕文件"""
|
||||
if len(video_paths) != len(subtitle_paths):
|
||||
raise ValueError("视频文件数量与字幕文件数量不匹配")
|
||||
|
||||
# 1. 合并视频
|
||||
video_clips = []
|
||||
accumulated_duration = 0
|
||||
merged_subs = pysrt.SubRipFile()
|
||||
|
||||
try:
|
||||
# 处理所有视频和字幕
|
||||
for i, (video_path, subtitle_path) in enumerate(zip(video_paths, subtitle_paths)):
|
||||
# 添加视频
|
||||
print(f"处理视频 {i + 1}/{len(video_paths)}: {video_path}")
|
||||
video_clip = VideoFileClip(video_path)
|
||||
video_clips.append(video_clip)
|
||||
|
||||
# 处理字幕
|
||||
print(f"处理字幕 {i + 1}/{len(subtitle_paths)}: {subtitle_path}")
|
||||
if i == 0:
|
||||
# 第一个字幕文件直接读取
|
||||
current_subs = pysrt.open(subtitle_path)
|
||||
else:
|
||||
# 后续字幕文件需要调整时间戳
|
||||
current_subs = adjust_subtitle_timing(subtitle_path, accumulated_duration)
|
||||
|
||||
# 合并字幕
|
||||
merged_subs.extend(current_subs)
|
||||
|
||||
# 更新累计时长
|
||||
accumulated_duration += video_clip.duration
|
||||
|
||||
# 判断视频是否存在,若已经存在不重复合并
|
||||
if not os.path.exists(output_video_path):
|
||||
print("合并视频中...")
|
||||
final_video = concatenate_videoclips(video_clips)
|
||||
|
||||
# 保存合并后的视频
|
||||
print("保存合并后的视频...")
|
||||
final_video.write_videofile(output_video_path, audio_codec='aac')
|
||||
|
||||
# 保存合并后的字幕
|
||||
print("保存合并后的字幕...")
|
||||
merged_subs.save(output_subtitle_path, encoding='utf-8')
|
||||
|
||||
print("合并完成")
|
||||
|
||||
finally:
|
||||
# 清理资源
|
||||
for clip in video_clips:
|
||||
clip.close()
|
||||
|
||||
|
||||
def main():
|
||||
# 示例用法
|
||||
video_paths = [
|
||||
"temp/1.mp4",
|
||||
"temp/2.mp4",
|
||||
"temp/3.mp4",
|
||||
"temp/4.mp4",
|
||||
"temp/5.mp4",
|
||||
]
|
||||
|
||||
subtitle_paths = [
|
||||
"temp/1.srt",
|
||||
"temp/2.srt",
|
||||
"temp/3.srt",
|
||||
"temp/4.srt",
|
||||
"temp/5.srt",
|
||||
]
|
||||
|
||||
output_video_path = "temp/merged_video.mp4"
|
||||
output_subtitle_path = "temp/merged_subtitle.srt"
|
||||
|
||||
merge_videos_and_subtitles(video_paths, subtitle_paths, output_video_path, output_subtitle_path)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Loading…
x
Reference in New Issue
Block a user