mirror of
https://github.com/linyqh/NarratoAI.git
synced 2025-12-14 13:02:50 +00:00
feat(webui): 大改动标记1
-重构音频设置面板,增加语音音量、背景音乐等设置 - 添加背景音乐文件选择功能 - 优化字幕设置,支持自定义字体和样式 -调整视频生成流程,支持新音频设置 - 更新文档示例,反映新功能
This commit is contained in:
parent
67bee9d567
commit
c065800072
3
.gitignore
vendored
3
.gitignore
vendored
@ -31,4 +31,5 @@ resource/fonts/*.ttc
|
||||
resource/fonts/*.ttf
|
||||
resource/fonts/*.otf
|
||||
resource/srt/*.srt
|
||||
app/models/faster-whisper-large-v2/*
|
||||
app/models/faster-whisper-large-v2/*
|
||||
app/models/bert/*
|
||||
|
||||
@ -345,29 +345,29 @@ class VideoClipParams(BaseModel):
|
||||
# video_concat_mode: Optional[VideoConcatMode] = VideoConcatMode.random.value
|
||||
|
||||
voice_name: Optional[str] = Field(default="zh-CN-YunjianNeural", description="语音名称")
|
||||
voice_volume: Optional[float] = Field(default=1.0, description="语音音量")
|
||||
voice_volume: Optional[float] = Field(default=1.0, description="解说语音音量")
|
||||
voice_rate: Optional[float] = Field(default=1.0, description="语速")
|
||||
voice_pitch: Optional[float] = Field(default=1.0, description="语调")
|
||||
|
||||
bgm_name: Optional[str] = Field(default="random", description="背景音乐名称")
|
||||
bgm_type: Optional[str] = Field(default="random", description="背景音乐类型")
|
||||
bgm_file: Optional[str] = Field(default="", description="背景音乐文件")
|
||||
bgm_volume: Optional[float] = Field(default=0.2, description="背景音乐音量")
|
||||
|
||||
subtitle_enabled: Optional[bool] = Field(default=True, description="是否启用字幕")
|
||||
subtitle_position: Optional[str] = Field(default="bottom", description="字幕位置") # top, bottom, center
|
||||
font_name: Optional[str] = Field(default="STHeitiMedium.ttc", description="字体名称")
|
||||
text_fore_color: Optional[str] = Field(default="#FFFFFF", description="文字前景色")
|
||||
text_background_color: Optional[str] = Field(default="transparent", description="文字背景色")
|
||||
subtitle_enabled: bool = True
|
||||
font_name: str = "SimHei" # 默认使用黑体
|
||||
font_size: int = 36
|
||||
text_fore_color: str = "white" # 文本前景色
|
||||
text_back_color: Optional[str] = None # 文本背景色
|
||||
stroke_color: str = "black" # 描边颜色
|
||||
stroke_width: float = 1.5 # 描边宽度
|
||||
subtitle_position: str = "bottom" # top, bottom, center, custom
|
||||
|
||||
font_size: int = Field(default=60, description="文字大小")
|
||||
stroke_color: Optional[str] = Field(default="#000000", description="文字描边颜色")
|
||||
stroke_width: float = Field(default=1.5, description="文字描边宽度")
|
||||
custom_position: float = Field(default=70.0, description="自定义位置")
|
||||
n_threads: Optional[int] = Field(default=16, description="解说语音音量") # 线程数,有助于提升视频处理速度
|
||||
|
||||
tts_volume: Optional[float] = Field(default=1.0, description="解说语音音量(后处理)")
|
||||
original_volume: Optional[float] = Field(default=1.0, description="视频原声音量")
|
||||
bgm_volume: Optional[float] = Field(default=0.6, description="背景音乐音量")
|
||||
|
||||
n_threads: Optional[int] = 8 # 线程数,有助于提升视频处理速度
|
||||
tts_volume: float = 1.0 # TTS音频音量
|
||||
video_volume: float = 0.1 # 视频原声音量
|
||||
|
||||
class VideoTranscriptionRequest(BaseModel):
|
||||
video_name: str
|
||||
@ -376,5 +376,6 @@ class VideoTranscriptionRequest(BaseModel):
|
||||
class Config:
|
||||
arbitrary_types_allowed = True
|
||||
|
||||
|
||||
class VideoTranscriptionResponse(BaseModel):
|
||||
transcription: str
|
||||
|
||||
BIN
app/services/SDP/generate_script_short.pyd
Normal file
BIN
app/services/SDP/generate_script_short.pyd
Normal file
Binary file not shown.
BIN
app/services/SDP/generate_script_short.so
Executable file
BIN
app/services/SDP/generate_script_short.so
Executable file
Binary file not shown.
BIN
app/services/SDP/utils/short_schema.pyd
Normal file
BIN
app/services/SDP/utils/short_schema.pyd
Normal file
Binary file not shown.
BIN
app/services/SDP/utils/short_schema.so
Executable file
BIN
app/services/SDP/utils/short_schema.so
Executable file
Binary file not shown.
BIN
app/services/SDP/utils/step1_subtitle_analyzer_openai.pyd
Normal file
BIN
app/services/SDP/utils/step1_subtitle_analyzer_openai.pyd
Normal file
Binary file not shown.
BIN
app/services/SDP/utils/step1_subtitle_analyzer_openai.so
Executable file
BIN
app/services/SDP/utils/step1_subtitle_analyzer_openai.so
Executable file
Binary file not shown.
BIN
app/services/SDP/utils/step2_subtitle_analyzer_bert.pyd
Normal file
BIN
app/services/SDP/utils/step2_subtitle_analyzer_bert.pyd
Normal file
Binary file not shown.
BIN
app/services/SDP/utils/step2_subtitle_analyzer_bert.so
Executable file
BIN
app/services/SDP/utils/step2_subtitle_analyzer_bert.so
Executable file
Binary file not shown.
BIN
app/services/SDP/utils/step3_fragment_check.pyd
Normal file
BIN
app/services/SDP/utils/step3_fragment_check.pyd
Normal file
Binary file not shown.
BIN
app/services/SDP/utils/step3_fragment_check.so
Executable file
BIN
app/services/SDP/utils/step3_fragment_check.so
Executable file
Binary file not shown.
BIN
app/services/SDP/utils/step4_text_generate.pyd
Normal file
BIN
app/services/SDP/utils/step4_text_generate.pyd
Normal file
Binary file not shown.
BIN
app/services/SDP/utils/step4_text_generate.so
Executable file
BIN
app/services/SDP/utils/step4_text_generate.so
Executable file
Binary file not shown.
BIN
app/services/SDP/utils/step5_merge_script.pyd
Normal file
BIN
app/services/SDP/utils/step5_merge_script.pyd
Normal file
Binary file not shown.
BIN
app/services/SDP/utils/step5_merge_script.so
Executable file
BIN
app/services/SDP/utils/step5_merge_script.so
Executable file
Binary file not shown.
BIN
app/services/SDP/utils/utils.pyd
Normal file
BIN
app/services/SDP/utils/utils.pyd
Normal file
Binary file not shown.
BIN
app/services/SDP/utils/utils.so
Executable file
BIN
app/services/SDP/utils/utils.so
Executable file
Binary file not shown.
@ -157,55 +157,6 @@ def get_video_materials(task_id, params, video_terms, audio_duration):
|
||||
return downloaded_videos
|
||||
|
||||
|
||||
def generate_final_videos(
|
||||
task_id, params, downloaded_videos, audio_file, subtitle_path
|
||||
):
|
||||
final_video_paths = []
|
||||
combined_video_paths = []
|
||||
video_concat_mode = (
|
||||
params.video_concat_mode if params.video_count == 1 else VideoConcatMode.random
|
||||
)
|
||||
|
||||
_progress = 50
|
||||
for i in range(params.video_count):
|
||||
index = i + 1
|
||||
combined_video_path = path.join(
|
||||
utils.task_dir(task_id), f"combined-{index}.mp4"
|
||||
)
|
||||
logger.info(f"\n\n## combining video: {index} => {combined_video_path}")
|
||||
video.combine_videos(
|
||||
combined_video_path=combined_video_path,
|
||||
video_paths=downloaded_videos,
|
||||
audio_file=audio_file,
|
||||
video_aspect=params.video_aspect,
|
||||
video_concat_mode=video_concat_mode,
|
||||
max_clip_duration=params.video_clip_duration,
|
||||
threads=params.n_threads,
|
||||
)
|
||||
|
||||
_progress += 50 / params.video_count / 2
|
||||
sm.state.update_task(task_id, progress=_progress)
|
||||
|
||||
final_video_path = path.join(utils.task_dir(task_id), f"final-{index}.mp4")
|
||||
|
||||
logger.info(f"\n\n## generating video: {index} => {final_video_path}")
|
||||
video.generate_video(
|
||||
video_path=combined_video_path,
|
||||
audio_path=audio_file,
|
||||
subtitle_path=subtitle_path,
|
||||
output_file=final_video_path,
|
||||
params=params,
|
||||
)
|
||||
|
||||
_progress += 50 / params.video_count / 2
|
||||
sm.state.update_task(task_id, progress=_progress)
|
||||
|
||||
final_video_paths.append(final_video_path)
|
||||
combined_video_paths.append(combined_video_path)
|
||||
|
||||
return final_video_paths, combined_video_paths
|
||||
|
||||
|
||||
def start_subclip(task_id: str, params: VideoClipParams, subclip_path_videos: dict):
|
||||
"""后台任务(自动剪辑视频进行剪辑)"""
|
||||
logger.info(f"\n\n## 开始任务: {task_id}")
|
||||
@ -253,7 +204,12 @@ def start_subclip(task_id: str, params: VideoClipParams, subclip_path_videos: di
|
||||
segment for segment in list_script
|
||||
if segment['OST'] in [0, 2]
|
||||
]
|
||||
# logger.debug(f"tts_segments: {tts_segments}")
|
||||
logger.debug(f"需要生成TTS的片段数: {len(tts_segments)}")
|
||||
|
||||
# 初始化音频文件路径
|
||||
audio_files = []
|
||||
final_audio = ""
|
||||
|
||||
if tts_segments:
|
||||
audio_files, sub_maker_list = voice.tts_multiple(
|
||||
task_id=task_id,
|
||||
@ -267,36 +223,54 @@ def start_subclip(task_id: str, params: VideoClipParams, subclip_path_videos: di
|
||||
sm.state.update_task(task_id, state=const.TASK_STATE_FAILED)
|
||||
logger.error("TTS转换音频失败, 可能是网络不可用! 如果您在中国, 请使用VPN.")
|
||||
return
|
||||
|
||||
if audio_files:
|
||||
logger.info(f"合并音频文件: {audio_files}")
|
||||
try:
|
||||
# 传入OST信息以便正确处理音频
|
||||
final_audio = audio_merger.merge_audio_files(
|
||||
task_id=task_id,
|
||||
audio_files=audio_files,
|
||||
total_duration=total_duration,
|
||||
list_script=list_script # 传入完整脚本以便处理OST
|
||||
)
|
||||
logger.info("音频文件合并成功")
|
||||
except Exception as e:
|
||||
logger.error(f"合并音频文件失败: {str(e)}")
|
||||
final_audio = ""
|
||||
else:
|
||||
audio_files = []
|
||||
|
||||
logger.info(f"合并音频文件:\n{audio_files}")
|
||||
# 传入OST信息以便正确处理音频
|
||||
final_audio = audio_merger.merge_audio_files(
|
||||
task_id=task_id,
|
||||
audio_files=audio_files,
|
||||
total_duration=total_duration,
|
||||
list_script=list_script # 传入完整脚本以便处理OST
|
||||
)
|
||||
# 如果没有需要生成TTS的片段,创建一个空白音频文件
|
||||
# 这样可以确保后续的音频处理能正确进行
|
||||
logger.info("没有需要生成TTS的片段,将保留原声和背景音乐")
|
||||
final_audio = path.join(utils.task_dir(task_id), "empty.mp3")
|
||||
try:
|
||||
from moviepy.editor import AudioClip
|
||||
# 创建一个与视频等长的空白音频
|
||||
empty_audio = AudioClip(make_frame=lambda t: 0, duration=total_duration)
|
||||
empty_audio.write_audiofile(final_audio, fps=44100)
|
||||
logger.info(f"已创建空白音频文件: {final_audio}")
|
||||
except Exception as e:
|
||||
logger.error(f"创建空白音频文件失败: {str(e)}")
|
||||
final_audio = ""
|
||||
|
||||
sm.state.update_task(task_id, state=const.TASK_STATE_PROCESSING, progress=30)
|
||||
|
||||
# 只为OST=0或2的片段生成字幕
|
||||
subtitle_path = ""
|
||||
if params.subtitle_enabled:
|
||||
subtitle_path = path.join(utils.task_dir(task_id), f"subtitle.srt")
|
||||
subtitle_provider = config.app.get("subtitle_provider", "").strip().lower()
|
||||
logger.info(f"\n\n## 3. 生成字幕、提供程序是: {subtitle_provider}")
|
||||
|
||||
subtitle.create(
|
||||
audio_file=final_audio,
|
||||
subtitle_file=subtitle_path,
|
||||
)
|
||||
if audio_files:
|
||||
subtitle_path = path.join(utils.task_dir(task_id), f"subtitle.srt")
|
||||
subtitle_provider = config.app.get("subtitle_provider", "").strip().lower()
|
||||
logger.info(f"\n\n## 3. 生成字幕、提供程序是: {subtitle_provider}")
|
||||
|
||||
subtitle_lines = subtitle.file_to_subtitles(subtitle_path)
|
||||
if not subtitle_lines:
|
||||
logger.warning(f"字幕文件无效: {subtitle_path}")
|
||||
subtitle_path = ""
|
||||
subtitle.create(
|
||||
audio_file=final_audio,
|
||||
subtitle_file=subtitle_path,
|
||||
)
|
||||
|
||||
subtitle_lines = subtitle.file_to_subtitles(subtitle_path)
|
||||
if not subtitle_lines:
|
||||
logger.warning(f"字幕文件无效: {subtitle_path}")
|
||||
subtitle_path = ""
|
||||
|
||||
sm.state.update_task(task_id, state=const.TASK_STATE_PROCESSING, progress=40)
|
||||
|
||||
@ -335,14 +309,44 @@ def start_subclip(task_id: str, params: VideoClipParams, subclip_path_videos: di
|
||||
final_video_path = path.join(utils.task_dir(task_id), f"final-{index}.mp4")
|
||||
|
||||
logger.info(f"\n\n## 6. 最后合成: {index} => {final_video_path}")
|
||||
# 传入OST信息以便正确处理音频和视频
|
||||
video.generate_video_v2(
|
||||
|
||||
# 获取背景音乐
|
||||
bgm_path = None
|
||||
if params.bgm_type or params.bgm_file:
|
||||
try:
|
||||
bgm_path = utils.get_bgm_file(bgm_type=params.bgm_type, bgm_file=params.bgm_file)
|
||||
if bgm_path:
|
||||
logger.info(f"使用背景音乐: {bgm_path}")
|
||||
except Exception as e:
|
||||
logger.error(f"获取背景音乐失败: {str(e)}")
|
||||
|
||||
# 示例:自定义字幕样式
|
||||
subtitle_style = {
|
||||
'fontsize': params.font_size, # 字体大小
|
||||
'color': params.text_fore_color, # 字体颜色
|
||||
'stroke_color': params.stroke_color, # 描边颜色
|
||||
'stroke_width': params.stroke_width, # 描边宽度, 范围0-10
|
||||
'bg_color': params.text_back_color, # 半透明黑色背景
|
||||
'position': ('center', 0.2), # 距离顶部60%的位置
|
||||
'method': 'caption' # 渲染方法
|
||||
}
|
||||
|
||||
# 示例:自定义音量配置
|
||||
volume_config = {
|
||||
'original': params.original_volume, # 原声音量80%
|
||||
'bgm': params.bgm_volume, # BGM音量20%
|
||||
'narration': params.tts_volume # 解说音量100%
|
||||
}
|
||||
font_path = utils.font_dir(params.font_name)
|
||||
video.generate_video_v3(
|
||||
video_path=combined_video_path,
|
||||
audio_path=final_audio,
|
||||
subtitle_path=subtitle_path,
|
||||
output_file=final_video_path,
|
||||
params=params,
|
||||
list_script=list_script # 传入完整脚本以便处理OST
|
||||
bgm_path=bgm_path,
|
||||
narration_path=final_audio,
|
||||
output_path=final_video_path,
|
||||
volume_config=volume_config, # 添加音量配置
|
||||
subtitle_style=subtitle_style,
|
||||
font_path=font_path
|
||||
)
|
||||
|
||||
_progress += 50 / 2
|
||||
@ -361,6 +365,40 @@ def start_subclip(task_id: str, params: VideoClipParams, subclip_path_videos: di
|
||||
return kwargs
|
||||
|
||||
|
||||
def validate_params(video_path, audio_path, output_file, params):
|
||||
"""
|
||||
验证输入参数
|
||||
Args:
|
||||
video_path: 视频文件路径
|
||||
audio_path: 音频文件路径(可以为空字符串)
|
||||
output_file: 输出文件路径
|
||||
params: 视频参数
|
||||
|
||||
Raises:
|
||||
FileNotFoundError: 文件不存在时抛出
|
||||
ValueError: 参数无效时抛出
|
||||
"""
|
||||
if not video_path:
|
||||
raise ValueError("视频路径不能为空")
|
||||
if not os.path.exists(video_path):
|
||||
raise FileNotFoundError(f"视频文件不存在: {video_path}")
|
||||
|
||||
# 如果提供了音频路径,则验证文件是否存在
|
||||
if audio_path and not os.path.exists(audio_path):
|
||||
raise FileNotFoundError(f"音频文件不存在: {audio_path}")
|
||||
|
||||
if not output_file:
|
||||
raise ValueError("输出文件路径不能为空")
|
||||
|
||||
# 确保输出目录存在
|
||||
output_dir = os.path.dirname(output_file)
|
||||
if not os.path.exists(output_dir):
|
||||
os.makedirs(output_dir)
|
||||
|
||||
if not params:
|
||||
raise ValueError("视频参数不能为空")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
# task_id = "test123"
|
||||
# subclip_path_videos = {'00:41-01:58': 'E:\\projects\\NarratoAI\\storage\\cache_videos/vid-00_41-01_58.mp4',
|
||||
|
||||
@ -1,186 +1,22 @@
|
||||
import re
|
||||
import os
|
||||
import glob
|
||||
import random
|
||||
from typing import List
|
||||
from typing import Union
|
||||
import traceback
|
||||
|
||||
import pysrt
|
||||
from typing import Optional
|
||||
from typing import List
|
||||
from loguru import logger
|
||||
from moviepy.editor import *
|
||||
from moviepy.video.tools.subtitles import SubtitlesClip
|
||||
from PIL import ImageFont
|
||||
from contextlib import contextmanager
|
||||
|
||||
from app.models import const
|
||||
from app.models.schema import MaterialInfo, VideoAspect, VideoConcatMode, VideoParams, VideoClipParams
|
||||
from app.utils import utils
|
||||
from moviepy.editor import (
|
||||
VideoFileClip,
|
||||
AudioFileClip,
|
||||
TextClip,
|
||||
CompositeVideoClip,
|
||||
CompositeAudioClip
|
||||
)
|
||||
|
||||
|
||||
def get_bgm_file(bgm_type: str = "random", bgm_file: str = ""):
|
||||
"""
|
||||
获取背景音乐文件路径
|
||||
Args:
|
||||
bgm_type: 背景音乐类型,可选值: random(随机), ""(无背景音乐)
|
||||
bgm_file: 指定的背景音乐文件路径
|
||||
|
||||
Returns:
|
||||
str: 背景音乐文件路径
|
||||
"""
|
||||
if not bgm_type:
|
||||
return ""
|
||||
|
||||
if bgm_file and os.path.exists(bgm_file):
|
||||
return bgm_file
|
||||
|
||||
if bgm_type == "random":
|
||||
song_dir = utils.song_dir()
|
||||
|
||||
# 检查目录是否存在
|
||||
if not os.path.exists(song_dir):
|
||||
logger.warning(f"背景音乐目录不存在: {song_dir}")
|
||||
return ""
|
||||
|
||||
# 支持 mp3 和 flac 格式
|
||||
mp3_files = glob.glob(os.path.join(song_dir, "*.mp3"))
|
||||
flac_files = glob.glob(os.path.join(song_dir, "*.flac"))
|
||||
files = mp3_files + flac_files
|
||||
|
||||
# 检查是否找到音乐文件
|
||||
if not files:
|
||||
logger.warning(f"在目录 {song_dir} 中没有找到 MP3 或 FLAC 文件")
|
||||
return ""
|
||||
|
||||
return random.choice(files)
|
||||
|
||||
return ""
|
||||
|
||||
|
||||
def combine_videos(
|
||||
combined_video_path: str,
|
||||
video_paths: List[str],
|
||||
audio_file: str,
|
||||
video_aspect: VideoAspect = VideoAspect.portrait,
|
||||
video_concat_mode: VideoConcatMode = VideoConcatMode.random,
|
||||
max_clip_duration: int = 5,
|
||||
threads: int = 2,
|
||||
) -> str:
|
||||
"""
|
||||
合并多个视频片段
|
||||
Args:
|
||||
combined_video_path: 合并后的视频保存路径
|
||||
video_paths: 待合并的视频路径列表
|
||||
audio_file: 音频文件路径
|
||||
video_aspect: 视频宽高比
|
||||
video_concat_mode: 视频拼接模式(随机/顺序)
|
||||
max_clip_duration: 每个片段的最大时长(秒)
|
||||
threads: 处理线程数
|
||||
|
||||
Returns:
|
||||
str: 合并后的视频路径
|
||||
"""
|
||||
audio_clip = AudioFileClip(audio_file)
|
||||
audio_duration = audio_clip.duration
|
||||
logger.info(f"音频时长: {audio_duration} 秒")
|
||||
# 每个片段的所需时长
|
||||
req_dur = audio_duration / len(video_paths)
|
||||
req_dur = max_clip_duration
|
||||
logger.info(f"每个片段最大时长: {req_dur} 秒")
|
||||
output_dir = os.path.dirname(combined_video_path)
|
||||
|
||||
aspect = VideoAspect(video_aspect)
|
||||
video_width, video_height = aspect.to_resolution()
|
||||
|
||||
clips = []
|
||||
video_duration = 0
|
||||
|
||||
raw_clips = []
|
||||
for video_path in video_paths:
|
||||
clip = VideoFileClip(video_path).without_audio()
|
||||
clip_duration = clip.duration
|
||||
start_time = 0
|
||||
|
||||
while start_time < clip_duration:
|
||||
end_time = min(start_time + max_clip_duration, clip_duration)
|
||||
split_clip = clip.subclip(start_time, end_time)
|
||||
raw_clips.append(split_clip)
|
||||
# logger.info(f"从 {start_time:.2f} 到 {end_time:.2f}, 片段时长 {clip_duration:.2f}, 分割片段时长 {split_clip.duration:.2f}")
|
||||
start_time = end_time
|
||||
if video_concat_mode.value == VideoConcatMode.sequential.value:
|
||||
break
|
||||
|
||||
# 随机视频片段顺序
|
||||
if video_concat_mode.value == VideoConcatMode.random.value:
|
||||
random.shuffle(raw_clips)
|
||||
|
||||
# 添加下载的片段,直到音频时长(max_duration)达到
|
||||
while video_duration < audio_duration:
|
||||
for clip in raw_clips:
|
||||
# 检查片段是否比剩余音频时长长
|
||||
if (audio_duration - video_duration) < clip.duration:
|
||||
clip = clip.subclip(0, (audio_duration - video_duration))
|
||||
# 仅当计算的片段时长(req_dur)小于实际片段时长时,缩短片段
|
||||
elif req_dur < clip.duration:
|
||||
clip = clip.subclip(0, req_dur)
|
||||
clip = clip.set_fps(30)
|
||||
|
||||
# Not all videos are same size, so we need to resize them
|
||||
clip_w, clip_h = clip.size
|
||||
if clip_w != video_width or clip_h != video_height:
|
||||
clip_ratio = clip.w / clip.h
|
||||
video_ratio = video_width / video_height
|
||||
|
||||
if clip_ratio == video_ratio:
|
||||
# 等比例缩放
|
||||
clip = clip.resize((video_width, video_height))
|
||||
else:
|
||||
# 等比缩放视频
|
||||
if clip_ratio > video_ratio:
|
||||
# 按照目标宽度等比缩放
|
||||
scale_factor = video_width / clip_w
|
||||
else:
|
||||
# 按照目标高度等比缩放
|
||||
scale_factor = video_height / clip_h
|
||||
|
||||
new_width = int(clip_w * scale_factor)
|
||||
new_height = int(clip_h * scale_factor)
|
||||
clip_resized = clip.resize(newsize=(new_width, new_height))
|
||||
|
||||
background = ColorClip(
|
||||
size=(video_width, video_height), color=(0, 0, 0)
|
||||
)
|
||||
clip = CompositeVideoClip(
|
||||
[
|
||||
background.set_duration(clip.duration),
|
||||
clip_resized.set_position("center"),
|
||||
]
|
||||
)
|
||||
|
||||
logger.info(
|
||||
f"调整视频尺寸为 {video_width} x {video_height}, 片段尺寸: {clip_w} x {clip_h}"
|
||||
)
|
||||
|
||||
if clip.duration > max_clip_duration:
|
||||
clip = clip.subclip(0, max_clip_duration)
|
||||
|
||||
clips.append(clip)
|
||||
video_duration += clip.duration
|
||||
|
||||
video_clip = concatenate_videoclips(clips)
|
||||
video_clip = video_clip.set_fps(30)
|
||||
logger.info("writing")
|
||||
|
||||
video_clip.write_videofile(
|
||||
filename=combined_video_path,
|
||||
threads=threads,
|
||||
logger=None,
|
||||
temp_audiofile_path=output_dir,
|
||||
audio_codec="aac",
|
||||
fps=30,
|
||||
)
|
||||
video_clip.close()
|
||||
logger.success("completed")
|
||||
return combined_video_path
|
||||
from app.models.schema import VideoAspect
|
||||
|
||||
|
||||
def wrap_text(text, max_width, font, fontsize=60):
|
||||
@ -269,259 +105,6 @@ def manage_clip(clip):
|
||||
del clip
|
||||
|
||||
|
||||
def generate_video_v2(
|
||||
video_path: str,
|
||||
audio_path: str,
|
||||
subtitle_path: str,
|
||||
output_file: str,
|
||||
list_script: list,
|
||||
params: Union[VideoParams, VideoClipParams],
|
||||
progress_callback=None,
|
||||
):
|
||||
"""
|
||||
合并所有素材
|
||||
Args:
|
||||
video_path: 视频路径
|
||||
audio_path: 单个音频文件路径
|
||||
subtitle_path: 字幕文件路径
|
||||
output_file: 输出文件路径
|
||||
params: 视频参数
|
||||
progress_callback: 进度回调函数,接收 0-100 的进度值
|
||||
|
||||
Returns:
|
||||
|
||||
"""
|
||||
total_steps = 4
|
||||
current_step = 0
|
||||
|
||||
def update_progress(step_name):
|
||||
nonlocal current_step
|
||||
current_step += 1
|
||||
if progress_callback:
|
||||
progress_callback(int(current_step * 100 / total_steps))
|
||||
logger.info(f"完成步骤: {step_name}")
|
||||
|
||||
try:
|
||||
validate_params(video_path, audio_path, output_file, params)
|
||||
|
||||
with manage_clip(VideoFileClip(video_path)) as video_clip:
|
||||
aspect = VideoAspect(params.video_aspect)
|
||||
video_width, video_height = aspect.to_resolution()
|
||||
|
||||
logger.info(f"开始,视频尺寸: {video_width} x {video_height}")
|
||||
logger.info(f" ① 视频: {video_path}")
|
||||
logger.info(f" ② 音频: {audio_path}")
|
||||
logger.info(f" ③ 字幕: {subtitle_path}")
|
||||
logger.info(f" ④ 输出: {output_file}")
|
||||
|
||||
output_dir = os.path.dirname(output_file)
|
||||
update_progress("初始化完成")
|
||||
|
||||
# 字体设置
|
||||
font_path = ""
|
||||
if params.subtitle_enabled:
|
||||
if not params.font_name:
|
||||
params.font_name = "STHeitiMedium.ttc"
|
||||
font_path = os.path.join(utils.font_dir(), params.font_name)
|
||||
if os.name == "nt":
|
||||
font_path = font_path.replace("\\", "/")
|
||||
logger.info(f"使用字体: {font_path}")
|
||||
|
||||
def create_text_clip(subtitle_item):
|
||||
phrase = subtitle_item[1]
|
||||
max_width = video_width * 0.9
|
||||
wrapped_txt, txt_height = wrap_text(
|
||||
phrase, max_width=max_width, font=font_path, fontsize=params.font_size
|
||||
)
|
||||
_clip = TextClip(
|
||||
wrapped_txt,
|
||||
font=font_path,
|
||||
fontsize=params.font_size,
|
||||
color=params.text_fore_color,
|
||||
bg_color=params.text_background_color,
|
||||
stroke_color=params.stroke_color,
|
||||
stroke_width=params.stroke_width,
|
||||
print_cmd=False,
|
||||
)
|
||||
duration = subtitle_item[0][1] - subtitle_item[0][0]
|
||||
_clip = _clip.set_start(subtitle_item[0][0])
|
||||
_clip = _clip.set_end(subtitle_item[0][1])
|
||||
_clip = _clip.set_duration(duration)
|
||||
|
||||
if params.subtitle_position == "bottom":
|
||||
_clip = _clip.set_position(("center", video_height * 0.95 - _clip.h))
|
||||
elif params.subtitle_position == "top":
|
||||
_clip = _clip.set_position(("center", video_height * 0.05))
|
||||
elif params.subtitle_position == "custom":
|
||||
margin = 10
|
||||
max_y = video_height - _clip.h - margin
|
||||
min_y = margin
|
||||
custom_y = (video_height - _clip.h) * (params.custom_position / 100)
|
||||
custom_y = max(min_y, min(custom_y, max_y))
|
||||
_clip = _clip.set_position(("center", custom_y))
|
||||
else: # center
|
||||
_clip = _clip.set_position(("center", "center"))
|
||||
return _clip
|
||||
|
||||
update_progress("字体设置完成")
|
||||
|
||||
# 处理音频
|
||||
original_audio = video_clip.audio
|
||||
video_duration = video_clip.duration
|
||||
new_audio = AudioFileClip(audio_path)
|
||||
final_audio = process_audio_tracks(original_audio, new_audio, params, video_duration)
|
||||
update_progress("音频处理完成")
|
||||
|
||||
# 处理字幕
|
||||
if subtitle_path and os.path.exists(subtitle_path):
|
||||
video_clip = process_subtitles(subtitle_path, video_clip, video_duration, create_text_clip)
|
||||
update_progress("字幕处理完成")
|
||||
|
||||
# 合并音频和导出
|
||||
logger.info("开始导出视频 (此步骤耗时较长请耐心等待)")
|
||||
video_clip = video_clip.set_audio(final_audio)
|
||||
video_clip.write_videofile(
|
||||
output_file,
|
||||
audio_codec="aac",
|
||||
temp_audiofile=os.path.join(output_dir, "temp-audio.m4a"),
|
||||
threads=params.n_threads,
|
||||
logger=None,
|
||||
fps=30,
|
||||
)
|
||||
|
||||
except FileNotFoundError as e:
|
||||
logger.error(f"文件不存在: {str(e)}")
|
||||
raise
|
||||
except Exception as e:
|
||||
logger.error(f"视频生成失败: {str(e)}")
|
||||
raise
|
||||
finally:
|
||||
logger.success("完成")
|
||||
|
||||
|
||||
def process_audio_tracks(original_audio, new_audio, params, video_duration):
|
||||
"""
|
||||
处理所有音轨(原声、配音、背景音乐)
|
||||
Args:
|
||||
original_audio: 原始音频
|
||||
new_audio: 新音频
|
||||
params: 视频参数
|
||||
video_duration: 视频时长
|
||||
|
||||
Returns:
|
||||
CompositeAudioClip: 合成后的音频
|
||||
"""
|
||||
audio_tracks = []
|
||||
|
||||
if original_audio is not None:
|
||||
audio_tracks.append(original_audio)
|
||||
|
||||
new_audio = new_audio.volumex(params.voice_volume)
|
||||
audio_tracks.append(new_audio)
|
||||
|
||||
# 处理背景音乐
|
||||
bgm_file = get_bgm_file(bgm_type=params.bgm_type, bgm_file=params.bgm_file)
|
||||
if bgm_file:
|
||||
try:
|
||||
bgm_clip = AudioFileClip(bgm_file).volumex(params.bgm_volume).audio_fadeout(3)
|
||||
bgm_clip = afx.audio_loop(bgm_clip, duration=video_duration)
|
||||
audio_tracks.append(bgm_clip)
|
||||
except Exception as e:
|
||||
logger.error(f"添加背景音乐失败: {str(e)}")
|
||||
|
||||
return CompositeAudioClip(audio_tracks) if audio_tracks else new_audio
|
||||
|
||||
|
||||
def process_subtitles(subtitle_path, video_clip, video_duration, create_text_clip):
|
||||
"""
|
||||
处理字幕
|
||||
Args:
|
||||
subtitle_path: 字幕文件路径
|
||||
video_clip: 视频片段
|
||||
video_duration: 视频时长
|
||||
create_text_clip: 创建文本片段的回调函数
|
||||
|
||||
Returns:
|
||||
CompositeVideoClip: 添加字幕后的视频
|
||||
"""
|
||||
if not (subtitle_path and os.path.exists(subtitle_path)):
|
||||
return video_clip
|
||||
|
||||
sub = SubtitlesClip(subtitles=subtitle_path, encoding="utf-8")
|
||||
text_clips = []
|
||||
|
||||
for item in sub.subtitles:
|
||||
clip = create_text_clip(subtitle_item=item)
|
||||
|
||||
# 时间范围调整
|
||||
start_time = max(clip.start, 0)
|
||||
if start_time >= video_duration:
|
||||
continue
|
||||
|
||||
end_time = min(clip.end, video_duration)
|
||||
clip = clip.set_start(start_time).set_end(end_time)
|
||||
text_clips.append(clip)
|
||||
|
||||
logger.info(f"处理了 {len(text_clips)} 段字幕")
|
||||
return CompositeVideoClip([video_clip, *text_clips])
|
||||
|
||||
|
||||
def preprocess_video(materials: List[MaterialInfo], clip_duration=4):
|
||||
"""
|
||||
预处理视频素材
|
||||
Args:
|
||||
materials: 素材信息列表
|
||||
clip_duration: 片段时长(秒)
|
||||
|
||||
Returns:
|
||||
List[MaterialInfo]: 处理后的素材信息列表
|
||||
"""
|
||||
for material in materials:
|
||||
if not material.url:
|
||||
continue
|
||||
|
||||
ext = utils.parse_extension(material.url)
|
||||
try:
|
||||
clip = VideoFileClip(material.url)
|
||||
except Exception:
|
||||
clip = ImageClip(material.url)
|
||||
|
||||
width = clip.size[0]
|
||||
height = clip.size[1]
|
||||
if width < 480 or height < 480:
|
||||
logger.warning(f"video is too small, width: {width}, height: {height}")
|
||||
continue
|
||||
|
||||
if ext in const.FILE_TYPE_IMAGES:
|
||||
logger.info(f"processing image: {material.url}")
|
||||
# 创建一个图片剪辑,并设置持续时间为3秒钟
|
||||
clip = (
|
||||
ImageClip(material.url)
|
||||
.set_duration(clip_duration)
|
||||
.set_position("center")
|
||||
)
|
||||
# 使用resize方法来添加缩放效果。这里使用了lambda函数来使得缩放效果随时间变化。
|
||||
# 假设我们想要从原始大小逐渐放大到120%的大小。
|
||||
# t代表当前时间,clip.duration为视频总时长,这里是3秒。
|
||||
# 注意:1 表示100%的大小所以1.2表示120%的大小
|
||||
zoom_clip = clip.resize(
|
||||
lambda t: 1 + (clip_duration * 0.03) * (t / clip.duration)
|
||||
)
|
||||
|
||||
# 如果需要,可以创建一个包含缩放剪辑的复合频剪辑
|
||||
# (这在您想要在视频中添加其他元素时非常有用)
|
||||
final_clip = CompositeVideoClip([zoom_clip])
|
||||
|
||||
# 输出视频
|
||||
video_file = f"{material.url}.mp4"
|
||||
final_clip.write_videofile(video_file, fps=30, logger=None)
|
||||
final_clip.close()
|
||||
del final_clip
|
||||
material.url = video_file
|
||||
logger.success(f"completed: {video_file}")
|
||||
return materials
|
||||
|
||||
|
||||
def combine_clip_videos(combined_video_path: str,
|
||||
video_paths: List[str],
|
||||
video_ost_list: List[int],
|
||||
@ -640,101 +223,220 @@ def resize_video_with_padding(clip, target_width: int, target_height: int):
|
||||
])
|
||||
|
||||
|
||||
def validate_params(video_path, audio_path, output_file, params):
|
||||
def loop_audio_clip(audio_clip: AudioFileClip, target_duration: float) -> AudioFileClip:
|
||||
"""
|
||||
验证输入参数
|
||||
Args:
|
||||
video_path: 视频文件路径
|
||||
audio_path: 音频文件路径
|
||||
output_file: 输出文件路径
|
||||
params: 视频参数
|
||||
循环音频片段直到达到目标时长
|
||||
|
||||
Raises:
|
||||
FileNotFoundError: 文件不存在时抛出
|
||||
ValueError: 参数无效时抛出
|
||||
参数:
|
||||
audio_clip: 原始音频片段
|
||||
target_duration: 目标时长(秒)
|
||||
返回:
|
||||
循环后的音频片段
|
||||
"""
|
||||
# 计算需要循环的次数
|
||||
loops_needed = int(target_duration / audio_clip.duration) + 1
|
||||
|
||||
# 创建足够长的音频
|
||||
extended_audio = audio_clip
|
||||
for _ in range(loops_needed - 1):
|
||||
extended_audio = CompositeAudioClip([
|
||||
extended_audio,
|
||||
audio_clip.set_start(extended_audio.duration)
|
||||
])
|
||||
|
||||
# 裁剪到目标时长
|
||||
return extended_audio.subclip(0, target_duration)
|
||||
|
||||
|
||||
def generate_video_v3(
|
||||
video_path: str,
|
||||
subtitle_path: Optional[str] = None,
|
||||
bgm_path: Optional[str] = None,
|
||||
narration_path: Optional[str] = None,
|
||||
output_path: str = "output.mp4",
|
||||
# 音量相关参数
|
||||
volume_config: dict = None,
|
||||
# 字幕相关参数
|
||||
subtitle_style: dict = None,
|
||||
font_path: Optional[str] = None
|
||||
) -> None:
|
||||
"""
|
||||
合并视频素材,包括视频、字幕、BGM和解说音频
|
||||
|
||||
参数:
|
||||
video_path: 原视频文件路径
|
||||
subtitle_path: SRT字幕文件路径(可选)
|
||||
bgm_path: 背景音乐文件路径(可选)
|
||||
narration_path: 解说音频文件路径(可选)
|
||||
output_path: 输出文件路径
|
||||
volume_config: 音量配置字典,可包含以下键:
|
||||
- original: 原声音量(0-1),默认1.0
|
||||
- bgm: BGM音量(0-1),默认0.3
|
||||
- narration: 解说音量(0-1),默认1.0
|
||||
subtitle_style: 字幕样式配置字典,可包含以下键:
|
||||
- font: 字体名称
|
||||
- fontsize: 字体大小
|
||||
- color: 字体颜色
|
||||
- stroke_color: 描边颜色
|
||||
- stroke_width: 描边宽度
|
||||
- bg_color: 背景色
|
||||
- position: 位置支持 'top'/'center'/'bottom' 或 (x,y) 坐标
|
||||
- method: 文字渲染方法
|
||||
font_path: 字体文件路径(.ttf/.otf 等格式)
|
||||
"""
|
||||
# 检查视频文件是否存在
|
||||
if not os.path.exists(video_path):
|
||||
raise FileNotFoundError(f"视频文件不存在: {video_path}")
|
||||
|
||||
if not os.path.exists(audio_path):
|
||||
raise FileNotFoundError(f"音频文件不存在: {audio_path}")
|
||||
# 设置默认音量配置
|
||||
default_volume = {
|
||||
'original': 1.0, # 原声音量
|
||||
'bgm': 0.3, # BGM音量
|
||||
'narration': 1.0 # 解说音量
|
||||
}
|
||||
|
||||
output_dir = os.path.dirname(output_file)
|
||||
if not os.path.exists(output_dir):
|
||||
raise FileNotFoundError(f"输出目录不存在: {output_dir}")
|
||||
# 更新音量配置
|
||||
if volume_config:
|
||||
default_volume.update(volume_config)
|
||||
|
||||
if not hasattr(params, 'video_aspect'):
|
||||
raise ValueError("params 缺少必要参数 video_aspect")
|
||||
# 加载视频
|
||||
video = VideoFileClip(video_path)
|
||||
subtitle_clips = []
|
||||
|
||||
# 处理字幕(如果提供)
|
||||
if subtitle_path:
|
||||
if os.path.exists(subtitle_path):
|
||||
# 检查字体文件
|
||||
if font_path and not os.path.exists(font_path):
|
||||
logger.info(f"警告:字体文件不存在: {font_path},将使用系统默认字体")
|
||||
font_path = 'Arial'
|
||||
|
||||
if __name__ == "__main__":
|
||||
combined_video_path = "../../storage/tasks/123/combined.mp4"
|
||||
# 设置默认字幕样式
|
||||
default_style = {
|
||||
'font': font_path if font_path else 'Arial',
|
||||
'fontsize': 24,
|
||||
'color': 'white',
|
||||
'stroke_color': 'black',
|
||||
'stroke_width': 1,
|
||||
'bg_color': None,
|
||||
'position': ('center', 'bottom'),
|
||||
'method': 'label'
|
||||
}
|
||||
|
||||
video_paths = ['../../storage/temp/clip_video/0b545e689a182a91af2163c7c0ca7ca3/vid-00-00-10_000-00-00-43_039.mp4',
|
||||
'../../storage/temp/clip_video/0b545e689a182a91af2163c7c0ca7ca3/vid-00-00-45_439-00-01-01_600.mp4',
|
||||
'../../storage/temp/clip_video/0b545e689a182a91af2163c7c0ca7ca3/vid-00-01-07_920-00-01-25_719.mp4',
|
||||
'../../storage/temp/clip_video/0b545e689a182a91af2163c7c0ca7ca3/vid-00-01-36_959-00-01-53_719.mp4']
|
||||
video_ost_list = [2, 2, 2, 2]
|
||||
list_script = [
|
||||
{
|
||||
"timestamp": "00:10-00:43",
|
||||
"picture": "好的,以下是视频画面的客观描述:\n\n视频显示一个男人在一个树木繁茂的地区,靠近一个泥土斜坡他穿着一件深色T恤、卡其色长裤和登山靴。他背着一个军绿色背包,里面似乎装有头和其他工具。\n\n第一个镜头显示该男子从远处走近斜坡,背对着镜头。下一个镜头特写显示了的背包,一个镐头从背包中伸出来。下一个镜头显示该男子用镐头敲打斜坡。下一个镜头是该男子脚上的特写镜头,他穿着登山靴,正站在泥土斜坡上。最后一个镜显示该男子在斜坡上,仔细地拨开树根和泥土。周围的环境是树木繁茂的,阳光透过树叶照射下来。土壤是浅棕色的,斜坡上有许多树根和植被。",
|
||||
"narration": "(接上文)好吧,今天我们的男主角,背着一个看似随时要发射军绿色背包,竟然化身“泥土探险家”,在斜坡上挥舞着镐头!他这是准备挖宝还是给树根做个“美容”?阳光洒下来,简直是自然界的聚光灯,仿佛在说:“快来看看,这位勇士要挑战泥土极限!”我只能默默想,如果树根能说话,它们一定会喊:“别打我,我还有家人!”这就是生活,总有些搞笑的瞬间等着我们去发现!",
|
||||
"OST": 2,
|
||||
"new_timestamp": "00:00:00,000-00:00:33,000"
|
||||
},
|
||||
{
|
||||
"timestamp": "00:45-01:01",
|
||||
"picture": "好的以下是视频画面的客观描述:\n\n视频显示了一个人在森林里挖掘。\n\n第一个镜头是地面特写,显示出松<EFBFBD><EFBFBD>的泥土、碎石和落叶。光线照在部分区域。\n\n第二个镜头中,一模糊不清的蹲一个树根旁挖掘,一个橄榄绿色的背包放在地上。树根缠绕着常春藤。\n\n第三个镜头显示该人在一个更开阔的区域挖掘,那里有一些树根,以及部分倒的树干。他起来像是在挖掘一个较大的坑。\n\n第四个镜头是特写镜头,显示该人用工具清理土坑的墙壁。\n\n第五个镜头是土坑内部的特写镜头,可以看到土质的纹理,有一些小树根和它植被的残留物。",
|
||||
"narration": "现在,这位勇敢的挖掘者就像个“现代版的土豆农夫”,在林里开辟新天地。的目标是什么?挖一个宝藏还块“树根披萨”?小心哦,别让树根追着你喊:“不要挖我,我也是有故事的!”",
|
||||
"OST": 2,
|
||||
"new_timestamp": "00:00:33,000-00:00:49,000"
|
||||
},
|
||||
{
|
||||
"timestamp": "01:07-01:25",
|
||||
"picture": "好,以下是视频画面的客观描述:\n\n画面1:特写镜头,显示出一丛带有水珠的深绿色灌木叶片。叶片呈椭圆形,边缘光滑。背景是树根和泥土。\n\n画面2:一个留着胡子的男人正在一个森林中土坑里挖掘。他穿着黑色T恤和卡其色裤子,跪在地,用具挖掘泥土。周围环绕着树木、树根和灌木。一个倒下的树干横跨土坑上方。\n\n画面3:同一个男人坐在他刚才挖的坑的边缘,看着前方。他的表情似乎略带沉思。背景与画面2相同。\n\n画面4:一个广角镜头显示出他挖出的坑。这是一个不规则形状的土坑,在树木繁茂的斜坡上。土壤呈深棕色,可见树根。\n\n画面5:同一个男人跪在地上,用一把小斧头砍一根木头。他穿着与前几个画面相同的衣服。地面上覆盖着落叶。周围是树木和灌木。",
|
||||
"narration": "“哎呀,这片灌木叶子滴水如雨,感觉像是大自然的洗发水广告!但我这位‘挖宝达人’似乎更适合拍个‘森林里的单身狗’真人秀。等会儿,我要给树根唱首歌,听说它们爱音乐!”",
|
||||
"OST": 2,
|
||||
"new_timestamp": "00:00:49,000-00:01:07,000"
|
||||
},
|
||||
{
|
||||
"timestamp": "01:36-01:53",
|
||||
"picture": "好的,以下是视频画面内容的客观描述:\n\n视频包含三个镜头:\n\n**镜头一:**个小型、浅水池塘,位于树林中。池塘的水看起来浑浊,呈绿褐色。池塘周围遍布泥土和落叶。多根树枝和树干横跨池塘,部分浸没在水中。周围的植被茂密主要是深色树木和灌木。\n\n**镜头二:**距拍摄树深处,阳光透过树叶洒落在植被上。镜头中可见粗大的树干、树枝和各种绿叶植物。部分树枝似乎被砍断,切口可见。\n\n**镜头三:**近距离特写镜头,聚焦在树枝和绿叶上。叶片呈圆形,颜色为鲜绿色,有些叶片上有缺损。树枝颜色较深,呈现深褐色。背景是模糊的树林。\n",
|
||||
"narration": "“好吧,看来我们的‘挖宝达人’终于找到了一‘宝藏’——一个色泽如同绿豆汤的池塘!我敢打赌,这里不仅是小鱼儿的游乐场更是树枝们的‘水疗中心’!下次来这里,我得带上浮潜装备!”",
|
||||
"OST": 2,
|
||||
"new_timestamp": "00:01:07,000-00:01:24,000"
|
||||
}
|
||||
]
|
||||
# 合并子视频
|
||||
# combine_clip_videos(combined_video_path=combined_video_path, video_paths=video_paths, video_ost_list=video_ost_list, list_script=list_script)
|
||||
if subtitle_style:
|
||||
if font_path and 'font' not in subtitle_style:
|
||||
subtitle_style['font'] = font_path
|
||||
default_style.update(subtitle_style)
|
||||
|
||||
cfg = VideoClipParams()
|
||||
cfg.video_aspect = VideoAspect.portrait
|
||||
cfg.font_name = "STHeitiMedium.ttc"
|
||||
cfg.font_size = 60
|
||||
cfg.stroke_color = "#000000"
|
||||
cfg.stroke_width = 1.5
|
||||
cfg.text_fore_color = "#FFFFFF"
|
||||
cfg.text_background_color = "transparent"
|
||||
cfg.bgm_type = "random"
|
||||
cfg.bgm_file = ""
|
||||
cfg.bgm_volume = 1.0
|
||||
cfg.subtitle_enabled = True
|
||||
cfg.subtitle_position = "bottom"
|
||||
cfg.n_threads = 2
|
||||
cfg.video_volume = 1
|
||||
try:
|
||||
subs = pysrt.open(subtitle_path)
|
||||
logger.info(f"读取到 {len(subs)} 条字幕")
|
||||
|
||||
cfg.voice_volume = 1.0
|
||||
for index, sub in enumerate(subs):
|
||||
start_time = sub.start.ordinal / 1000
|
||||
end_time = sub.end.ordinal / 1000
|
||||
|
||||
video_path = "../../storage/tasks/123/combined.mp4"
|
||||
audio_path = "../../storage/tasks/123/final_audio.mp3"
|
||||
subtitle_path = "../../storage/tasks/123/subtitle.srt"
|
||||
output_file = "../../storage/tasks/123/final-123.mp4"
|
||||
try:
|
||||
# 检查字幕文本是否为空
|
||||
if not sub.text or sub.text.strip() == '':
|
||||
logger.info(f"警告:第 {index + 1} 条字幕内容为空,已跳过")
|
||||
continue
|
||||
|
||||
# 处理字幕文本:确保是字符串,并处理可能的列表情况
|
||||
if isinstance(sub.text, (list, tuple)):
|
||||
subtitle_text = ' '.join(str(item) for item in sub.text if item is not None)
|
||||
else:
|
||||
subtitle_text = str(sub.text)
|
||||
|
||||
subtitle_text = subtitle_text.strip()
|
||||
|
||||
if not subtitle_text:
|
||||
logger.info(f"警告:第 {index + 1} 条字幕处理后为空,已跳过")
|
||||
continue
|
||||
|
||||
# 计算位置
|
||||
if isinstance(default_style['position'], tuple):
|
||||
pos_x, pos_y = default_style['position']
|
||||
if isinstance(pos_y, float):
|
||||
y_pos = int(video.h * pos_y)
|
||||
position = (pos_x, y_pos)
|
||||
else:
|
||||
position = default_style['position']
|
||||
else:
|
||||
position = default_style['position']
|
||||
|
||||
# 创建基本的 TextClip
|
||||
text_clip = (TextClip(
|
||||
subtitle_text,
|
||||
font=default_style['font'],
|
||||
fontsize=default_style['fontsize'],
|
||||
color=default_style['color']
|
||||
)
|
||||
.set_position(position)
|
||||
.set_duration(end_time - start_time)
|
||||
.set_start(start_time))
|
||||
|
||||
subtitle_clips.append(text_clip)
|
||||
|
||||
except Exception as e:
|
||||
logger.info(f"警告:创建第 {index + 1} 条字幕时出错: {str(e)}")
|
||||
|
||||
logger.info(f"成功创建 {len(subtitle_clips)} 条字幕剪辑")
|
||||
except Exception as e:
|
||||
logger.info(f"警告:处理字幕文件时出错: {str(e)}")
|
||||
else:
|
||||
logger.info(f"提示:字幕文件不存在: {subtitle_path}")
|
||||
|
||||
# 合并音频
|
||||
audio_clips = []
|
||||
|
||||
# 添加原声(设置音量)
|
||||
if video.audio is not None:
|
||||
original_audio = video.audio.volumex(default_volume['original'])
|
||||
audio_clips.append(original_audio)
|
||||
|
||||
# 添加BGM(如果提供)
|
||||
if bgm_path:
|
||||
bgm = AudioFileClip(bgm_path)
|
||||
if bgm.duration < video.duration:
|
||||
bgm = loop_audio_clip(bgm, video.duration)
|
||||
else:
|
||||
bgm = bgm.subclip(0, video.duration)
|
||||
bgm = bgm.volumex(default_volume['bgm'])
|
||||
audio_clips.append(bgm)
|
||||
|
||||
# 添加解说音频(如果提供)
|
||||
if narration_path:
|
||||
narration = AudioFileClip(narration_path).volumex(default_volume['narration'])
|
||||
audio_clips.append(narration)
|
||||
|
||||
# 合成最终视频(包含字幕)
|
||||
if subtitle_clips:
|
||||
final_video = CompositeVideoClip([video] + subtitle_clips, size=video.size)
|
||||
else:
|
||||
logger.info("警告:没有字幕被添加到视频中")
|
||||
final_video = video
|
||||
|
||||
if audio_clips:
|
||||
final_audio = CompositeAudioClip(audio_clips)
|
||||
final_video = final_video.set_audio(final_audio)
|
||||
|
||||
# 导出视频
|
||||
logger.info("开始导出视频...") # 调试信息
|
||||
final_video.write_videofile(
|
||||
output_path,
|
||||
codec='libx264',
|
||||
audio_codec='aac',
|
||||
fps=video.fps
|
||||
)
|
||||
logger.info(f"视频已导出到: {output_path}") # 调试信息
|
||||
|
||||
# 清理资源
|
||||
video.close()
|
||||
for clip in subtitle_clips:
|
||||
clip.close()
|
||||
if bgm_path:
|
||||
bgm.close()
|
||||
if narration_path:
|
||||
narration.close()
|
||||
|
||||
generate_video_v2(video_path=video_path,
|
||||
audio_path=audio_path,
|
||||
subtitle_path=subtitle_path,
|
||||
output_file=output_file,
|
||||
params=cfg,
|
||||
list_script=list_script,
|
||||
)
|
||||
|
||||
@ -7,7 +7,6 @@ import asyncio
|
||||
from loguru import logger
|
||||
from typing import List
|
||||
from datetime import datetime
|
||||
from edge_tts.submaker import mktimestamp
|
||||
from xml.sax.saxutils import unescape
|
||||
from edge_tts import submaker, SubMaker
|
||||
from moviepy.video.tools import subtitles
|
||||
@ -1199,7 +1198,7 @@ def azure_tts_v2(text: str, voice_name: str, voice_file: str) -> [SubMaker, None
|
||||
except Exception as e:
|
||||
logger.error(f"failed, error: {str(e)}")
|
||||
if i < 2: # 如果不是最后一次重试,则等待1秒
|
||||
time.sleep(1)
|
||||
time.sleep(3)
|
||||
return None
|
||||
|
||||
|
||||
@ -1318,96 +1317,6 @@ def create_subtitle_from_multiple(text: str, sub_maker_list: List[SubMaker], lis
|
||||
traceback.print_exc()
|
||||
|
||||
|
||||
def create_subtitle(sub_maker: submaker.SubMaker, text: str, subtitle_file: str):
|
||||
"""
|
||||
优化字幕文件
|
||||
1. 将字幕文件按照标点符号分割成多行
|
||||
2. 逐行匹配字幕文件中的文本
|
||||
3. 生成新的字幕文件
|
||||
"""
|
||||
|
||||
text = _format_text(text)
|
||||
|
||||
def formatter(idx: int, start_time: float, end_time: float, sub_text: str) -> str:
|
||||
"""
|
||||
1
|
||||
00:00:00,000 --> 00:00:02,360
|
||||
跑步是一项简单易行的运动
|
||||
"""
|
||||
start_t = mktimestamp(start_time).replace(".", ",")
|
||||
end_t = mktimestamp(end_time).replace(".", ",")
|
||||
return f"{idx}\n" f"{start_t} --> {end_t}\n" f"{sub_text}\n"
|
||||
|
||||
start_time = -1.0
|
||||
sub_items = []
|
||||
sub_index = 0
|
||||
|
||||
script_lines = utils.split_string_by_punctuations(text)
|
||||
|
||||
def match_line(_sub_line: str, _sub_index: int):
|
||||
if len(script_lines) <= _sub_index:
|
||||
return ""
|
||||
|
||||
_line = script_lines[_sub_index]
|
||||
if _sub_line == _line:
|
||||
return script_lines[_sub_index].strip()
|
||||
|
||||
_sub_line_ = re.sub(r"[^\w\s]", "", _sub_line)
|
||||
_line_ = re.sub(r"[^\w\s]", "", _line)
|
||||
if _sub_line_ == _line_:
|
||||
return _line_.strip()
|
||||
|
||||
_sub_line_ = re.sub(r"\W+", "", _sub_line)
|
||||
_line_ = re.sub(r"\W+", "", _line)
|
||||
if _sub_line_ == _line_:
|
||||
return _line.strip()
|
||||
|
||||
return ""
|
||||
|
||||
sub_line = ""
|
||||
|
||||
try:
|
||||
for _, (offset, sub) in enumerate(zip(sub_maker.offset, sub_maker.subs)):
|
||||
_start_time, end_time = offset
|
||||
if start_time < 0:
|
||||
start_time = _start_time
|
||||
|
||||
sub = unescape(sub)
|
||||
sub_line += sub
|
||||
sub_text = match_line(sub_line, sub_index)
|
||||
if sub_text:
|
||||
sub_index += 1
|
||||
line = formatter(
|
||||
idx=sub_index,
|
||||
start_time=start_time,
|
||||
end_time=end_time,
|
||||
sub_text=sub_text,
|
||||
)
|
||||
sub_items.append(line)
|
||||
start_time = -1.0
|
||||
sub_line = ""
|
||||
|
||||
if len(sub_items) == len(script_lines):
|
||||
with open(subtitle_file, "w", encoding="utf-8") as file:
|
||||
file.write("\n".join(sub_items) + "\n")
|
||||
try:
|
||||
sbs = subtitles.file_to_subtitles(subtitle_file, encoding="utf-8")
|
||||
duration = max([tb for ((ta, tb), txt) in sbs])
|
||||
logger.info(
|
||||
f"completed, subtitle file created: {subtitle_file}, duration: {duration}"
|
||||
)
|
||||
except Exception as e:
|
||||
logger.error(f"failed, error: {str(e)}")
|
||||
os.remove(subtitle_file)
|
||||
else:
|
||||
logger.warning(
|
||||
f"failed, sub_items len: {len(sub_items)}, script_lines len: {len(script_lines)}"
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"failed, error: {str(e)}")
|
||||
|
||||
|
||||
def get_audio_duration(sub_maker: submaker.SubMaker):
|
||||
"""
|
||||
获取音频时长
|
||||
@ -1466,20 +1375,3 @@ def tts_multiple(task_id: str, list_script: list, voice_name: str, voice_rate: f
|
||||
logger.info(f"已生成音频文件: {audio_file}")
|
||||
|
||||
return audio_files, sub_maker_list
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
voice_name = "zh-CN-YunyangNeural"
|
||||
# voice_name = "af-ZA-AdriNeural"
|
||||
voice_name = parse_voice_name(voice_name)
|
||||
print(voice_name)
|
||||
|
||||
with open("../../resource/scripts/2024-1203-205442.json", 'r', encoding='utf-8') as f:
|
||||
data = json.load(f)
|
||||
|
||||
audio_files, sub_maker_list = tts_multiple(task_id="12312312", list_script=data, voice_name=voice_name, voice_rate=1, voice_pitch=1)
|
||||
|
||||
full_text = " ".join([item['narration'] for item in data if not item['OST']])
|
||||
subtitle_file = os.path.join(utils.task_dir("12312312"), "subtitle_multiple.srt")
|
||||
create_subtitle_from_multiple(full_text, sub_maker_list, data, subtitle_file)
|
||||
print(f"生成的音频文件列表: {audio_files}")
|
||||
|
||||
@ -117,6 +117,47 @@ def song_dir(sub_dir: str = ""):
|
||||
return d
|
||||
|
||||
|
||||
def get_bgm_file(bgm_type: str = "random", bgm_file: str = ""):
|
||||
"""
|
||||
获取背景音乐文件路径
|
||||
Args:
|
||||
bgm_type: 背景音乐类型,可选值: random(随机), ""(无背景音乐)
|
||||
bgm_file: 指定的背景音乐文件路径
|
||||
|
||||
Returns:
|
||||
str: 背景音乐文件路径
|
||||
"""
|
||||
import glob
|
||||
import random
|
||||
if not bgm_type:
|
||||
return ""
|
||||
|
||||
if bgm_file and os.path.exists(bgm_file):
|
||||
return bgm_file
|
||||
|
||||
if bgm_type == "random":
|
||||
song_dir_path = song_dir()
|
||||
|
||||
# 检查目录是否存在
|
||||
if not os.path.exists(song_dir_path):
|
||||
logger.warning(f"背景音乐目录不存在: {song_dir_path}")
|
||||
return ""
|
||||
|
||||
# 支持 mp3 和 flac 格式
|
||||
mp3_files = glob.glob(os.path.join(song_dir_path, "*.mp3"))
|
||||
flac_files = glob.glob(os.path.join(song_dir_path, "*.flac"))
|
||||
files = mp3_files + flac_files
|
||||
|
||||
# 检查是否找到音乐文件
|
||||
if not files:
|
||||
logger.warning(f"在目录 {song_dir_path} 中没有找到 MP3 或 FLAC 文件")
|
||||
return ""
|
||||
|
||||
return random.choice(files)
|
||||
|
||||
return ""
|
||||
|
||||
|
||||
def public_dir(sub_dir: str = ""):
|
||||
d = resource_dir(f"public")
|
||||
if sub_dir:
|
||||
@ -339,7 +380,7 @@ def time_to_seconds(time_str: str) -> float:
|
||||
|
||||
# 分割时间部分
|
||||
parts = time_part.split(':')
|
||||
|
||||
|
||||
if len(parts) == 3: # HH:MM:SS
|
||||
h, m, s = map(float, parts)
|
||||
seconds = h * 3600 + m * 60 + s
|
||||
@ -350,7 +391,7 @@ def time_to_seconds(time_str: str) -> float:
|
||||
seconds = float(parts[0])
|
||||
|
||||
return seconds + ms
|
||||
|
||||
|
||||
except (ValueError, IndexError) as e:
|
||||
logger.error(f"时间格式转换错误 {time_str}: {str(e)}")
|
||||
return 0.0
|
||||
@ -373,16 +414,16 @@ def calculate_total_duration(scenes):
|
||||
float: 总时长(秒)
|
||||
"""
|
||||
total_seconds = 0
|
||||
|
||||
|
||||
for scene in scenes:
|
||||
start, end = scene['timestamp'].split('-')
|
||||
# 使用 time_to_seconds 函数处理更精确的时间格式
|
||||
start_seconds = time_to_seconds(start)
|
||||
end_seconds = time_to_seconds(end)
|
||||
|
||||
|
||||
duration = end_seconds - start_seconds
|
||||
total_seconds += duration
|
||||
|
||||
|
||||
return total_seconds
|
||||
|
||||
|
||||
@ -502,7 +543,7 @@ def clear_keyframes_cache(video_path: str = None):
|
||||
keyframes_dir = os.path.join(temp_dir(), "keyframes")
|
||||
if not os.path.exists(keyframes_dir):
|
||||
return
|
||||
|
||||
|
||||
if video_path:
|
||||
# 理指定视频的缓存
|
||||
video_hash = md5(video_path + str(os.path.getmtime(video_path)))
|
||||
@ -516,7 +557,7 @@ def clear_keyframes_cache(video_path: str = None):
|
||||
import shutil
|
||||
shutil.rmtree(keyframes_dir)
|
||||
logger.info("已清理所有关键帧缓存")
|
||||
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"清理关键帧缓存失败: {e}")
|
||||
|
||||
@ -527,15 +568,16 @@ def init_resources():
|
||||
# 创建字体目录
|
||||
font_dir = os.path.join(root_dir(), "resource", "fonts")
|
||||
os.makedirs(font_dir, exist_ok=True)
|
||||
|
||||
|
||||
# 检查字体文件
|
||||
font_files = [
|
||||
("SourceHanSansCN-Regular.otf", "https://github.com/adobe-fonts/source-han-sans/raw/release/OTF/SimplifiedChinese/SourceHanSansSC-Regular.otf"),
|
||||
("SourceHanSansCN-Regular.otf",
|
||||
"https://github.com/adobe-fonts/source-han-sans/raw/release/OTF/SimplifiedChinese/SourceHanSansSC-Regular.otf"),
|
||||
("simhei.ttf", "C:/Windows/Fonts/simhei.ttf"), # Windows 黑体
|
||||
("simkai.ttf", "C:/Windows/Fonts/simkai.ttf"), # Windows 楷体
|
||||
("simsun.ttc", "C:/Windows/Fonts/simsun.ttc"), # Windows 宋体
|
||||
]
|
||||
|
||||
|
||||
# 优先使用系统字体
|
||||
system_font_found = False
|
||||
for font_name, source in font_files:
|
||||
@ -547,16 +589,17 @@ def init_resources():
|
||||
logger.info(f"已复制系统字体: {font_name}")
|
||||
system_font_found = True
|
||||
break
|
||||
|
||||
|
||||
# 如果没有找到系统字体,则下载思源黑体
|
||||
if not system_font_found:
|
||||
source_han_path = os.path.join(font_dir, "SourceHanSansCN-Regular.otf")
|
||||
if not os.path.exists(source_han_path):
|
||||
download_font(font_files[0][1], source_han_path)
|
||||
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"初始化资源文件失败: {e}")
|
||||
|
||||
|
||||
def download_font(url: str, font_path: str):
|
||||
"""下载字体文件"""
|
||||
try:
|
||||
@ -564,16 +607,17 @@ def download_font(url: str, font_path: str):
|
||||
import requests
|
||||
response = requests.get(url)
|
||||
response.raise_for_status()
|
||||
|
||||
|
||||
with open(font_path, 'wb') as f:
|
||||
f.write(response.content)
|
||||
|
||||
|
||||
logger.info(f"字体文件下载成功: {font_path}")
|
||||
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"下载字体文件失败: {e}")
|
||||
raise
|
||||
|
||||
|
||||
def init_imagemagick():
|
||||
"""初始化 ImageMagick 配置"""
|
||||
try:
|
||||
@ -583,10 +627,10 @@ def init_imagemagick():
|
||||
if result.returncode != 0:
|
||||
logger.error("ImageMagick 未安装或配置不正确")
|
||||
return False
|
||||
|
||||
|
||||
# 设置 IMAGEMAGICK_BINARY 环境变量
|
||||
os.environ['IMAGEMAGICK_BINARY'] = 'magick'
|
||||
|
||||
|
||||
return True
|
||||
except Exception as e:
|
||||
logger.error(f"初始化 ImageMagick 失败: {str(e)}")
|
||||
|
||||
@ -11,8 +11,13 @@
|
||||
vision_gemini_api_key = ""
|
||||
vision_gemini_model_name = "gemini-1.5-flash"
|
||||
|
||||
########## Vision Qwen API Key
|
||||
vision_qwenvl_api_key = ""
|
||||
vision_qwenvl_model_name = "qwen-vl-max-latest"
|
||||
vision_qwenvl_base_url = "https://dashscope.aliyuncs.com/compatible-mode/v1"
|
||||
|
||||
########### Vision NarratoAPI Key
|
||||
narrato_api_key = ""
|
||||
narrato_api_key = "0N0iEjU77aTqPW4d9YHCmTW2mPrfgWjDmaWAz1lTVTM"
|
||||
narrato_api_url = "https://narratoinsight.scsmtech.cn/api/v1"
|
||||
narrato_vision_model = "gemini-1.5-flash"
|
||||
narrato_vision_key = ""
|
||||
@ -32,9 +37,7 @@
|
||||
########## OpenAI API Key
|
||||
# Get your API key at https://platform.openai.com/api-keys
|
||||
text_openai_api_key = ""
|
||||
# No need to set it unless you want to use your own proxy
|
||||
text_openai_base_url = ""
|
||||
# Check your available models at https://platform.openai.com/account/limits
|
||||
text_openai_base_url = "https://api.openai.com/v1"
|
||||
text_openai_model_name = "gpt-4o-mini"
|
||||
|
||||
########## Moonshot API Key
|
||||
@ -66,7 +69,8 @@
|
||||
# https://tongyi.aliyun.com/qianwen/
|
||||
# https://help.aliyun.com/zh/dashscope/developer-reference/model-introduction
|
||||
text_qwen_api_key = ""
|
||||
text_qwen_model_name = "qwen-max"
|
||||
text_qwen_model_name = "qwen-plus-1127"
|
||||
text_qwen_base_url = "https://dashscope.aliyuncs.com/compatible-mode/v1"
|
||||
|
||||
########## DeepSeek API Key
|
||||
# Visit https://platform.deepseek.com/api_keys to get your API key
|
||||
|
||||
@ -1,7 +1,6 @@
|
||||
requests~=2.31.0
|
||||
moviepy==2.0.0.dev2
|
||||
faster-whisper~=1.0.1
|
||||
edge_tts~=6.1.15
|
||||
uvicorn~=0.27.1
|
||||
fastapi~=0.115.4
|
||||
tomli~=2.0.1
|
||||
@ -35,3 +34,5 @@ tiktoken==0.8.0
|
||||
yt-dlp==2024.11.18
|
||||
pysrt==1.1.2
|
||||
httpx==0.27.2
|
||||
transformers==4.47.0
|
||||
edge-tts==6.1.19
|
||||
|
||||
@ -6,23 +6,25 @@ from app.services import voice
|
||||
from app.utils import utils
|
||||
from webui.utils.cache import get_songs_cache
|
||||
|
||||
|
||||
def render_audio_panel(tr):
|
||||
"""渲染音频设置面板"""
|
||||
with st.container(border=True):
|
||||
st.write(tr("Audio Settings"))
|
||||
|
||||
|
||||
# 渲染TTS设置
|
||||
render_tts_settings(tr)
|
||||
|
||||
|
||||
# 渲染背景音乐设置
|
||||
render_bgm_settings(tr)
|
||||
|
||||
|
||||
def render_tts_settings(tr):
|
||||
"""渲染TTS(文本转语音)设置"""
|
||||
# 获取支持的语音列表
|
||||
support_locales = ["zh-CN"]
|
||||
voices = voice.get_all_azure_voices(filter_locals=support_locales)
|
||||
|
||||
|
||||
# 创建友好的显示名称
|
||||
friendly_names = {
|
||||
v: v.replace("Female", tr("Female"))
|
||||
@ -30,11 +32,11 @@ def render_tts_settings(tr):
|
||||
.replace("Neural", "")
|
||||
for v in voices
|
||||
}
|
||||
|
||||
|
||||
# 获取保存的语音设置
|
||||
saved_voice_name = config.ui.get("voice_name", "")
|
||||
saved_voice_name_index = 0
|
||||
|
||||
|
||||
if saved_voice_name in friendly_names:
|
||||
saved_voice_name_index = list(friendly_names.keys()).index(saved_voice_name)
|
||||
else:
|
||||
@ -56,7 +58,7 @@ def render_tts_settings(tr):
|
||||
voice_name = list(friendly_names.keys())[
|
||||
list(friendly_names.values()).index(selected_friendly_name)
|
||||
]
|
||||
|
||||
|
||||
# 保存设置
|
||||
config.ui["voice_name"] = voice_name
|
||||
|
||||
@ -70,34 +72,40 @@ def render_tts_settings(tr):
|
||||
# 试听按钮
|
||||
render_voice_preview(tr, voice_name)
|
||||
|
||||
|
||||
def render_azure_v2_settings(tr):
|
||||
"""渲染Azure V2语音设置"""
|
||||
saved_azure_speech_region = config.azure.get("speech_region", "")
|
||||
saved_azure_speech_key = config.azure.get("speech_key", "")
|
||||
|
||||
|
||||
azure_speech_region = st.text_input(
|
||||
tr("Speech Region"),
|
||||
tr("Speech Region"),
|
||||
value=saved_azure_speech_region
|
||||
)
|
||||
azure_speech_key = st.text_input(
|
||||
tr("Speech Key"),
|
||||
value=saved_azure_speech_key,
|
||||
tr("Speech Key"),
|
||||
value=saved_azure_speech_key,
|
||||
type="password"
|
||||
)
|
||||
|
||||
|
||||
config.azure["speech_region"] = azure_speech_region
|
||||
config.azure["speech_key"] = azure_speech_key
|
||||
|
||||
|
||||
def render_voice_parameters(tr):
|
||||
"""渲染语音参数设置"""
|
||||
# 音量
|
||||
voice_volume = st.selectbox(
|
||||
voice_volume = st.slider(
|
||||
tr("Speech Volume"),
|
||||
options=[0.6, 0.8, 1.0, 1.2, 1.5, 2.0, 3.0, 4.0, 5.0],
|
||||
index=2,
|
||||
min_value=0.0,
|
||||
max_value=2.0,
|
||||
value=1.0,
|
||||
step=0.1,
|
||||
help=tr("Adjust the volume of the original audio")
|
||||
)
|
||||
st.session_state['voice_volume'] = voice_volume
|
||||
|
||||
|
||||
# 语速
|
||||
voice_rate = st.selectbox(
|
||||
tr("Speech Rate"),
|
||||
@ -114,6 +122,7 @@ def render_voice_parameters(tr):
|
||||
)
|
||||
st.session_state['voice_pitch'] = voice_pitch
|
||||
|
||||
|
||||
def render_voice_preview(tr, voice_name):
|
||||
"""渲染语音试听功能"""
|
||||
if st.button(tr("Play Voice")):
|
||||
@ -122,11 +131,11 @@ def render_voice_preview(tr, voice_name):
|
||||
play_content = st.session_state.get('video_script', '')
|
||||
if not play_content:
|
||||
play_content = tr("Voice Example")
|
||||
|
||||
|
||||
with st.spinner(tr("Synthesizing Voice")):
|
||||
temp_dir = utils.storage_dir("temp", create=True)
|
||||
audio_file = os.path.join(temp_dir, f"tmp-voice-{str(uuid4())}.mp3")
|
||||
|
||||
|
||||
sub_maker = voice.tts(
|
||||
text=play_content,
|
||||
voice_name=voice_name,
|
||||
@ -134,7 +143,7 @@ def render_voice_preview(tr, voice_name):
|
||||
voice_pitch=st.session_state.get('voice_pitch', 1.0),
|
||||
voice_file=audio_file,
|
||||
)
|
||||
|
||||
|
||||
# 如果语音文件生成失败,使用默认内容重试
|
||||
if not sub_maker:
|
||||
play_content = "This is a example voice. if you hear this, the voice synthesis failed with the original content."
|
||||
@ -151,6 +160,7 @@ def render_voice_preview(tr, voice_name):
|
||||
if os.path.exists(audio_file):
|
||||
os.remove(audio_file)
|
||||
|
||||
|
||||
def render_bgm_settings(tr):
|
||||
"""渲染背景音乐设置"""
|
||||
# 背景音乐选项
|
||||
@ -159,14 +169,14 @@ def render_bgm_settings(tr):
|
||||
(tr("Random Background Music"), "random"),
|
||||
(tr("Custom Background Music"), "custom"),
|
||||
]
|
||||
|
||||
|
||||
selected_index = st.selectbox(
|
||||
tr("Background Music"),
|
||||
index=1,
|
||||
options=range(len(bgm_options)),
|
||||
format_func=lambda x: bgm_options[x][0],
|
||||
)
|
||||
|
||||
|
||||
# 获取选择的背景音乐类型
|
||||
bgm_type = bgm_options[selected_index][1]
|
||||
st.session_state['bgm_type'] = bgm_type
|
||||
@ -176,15 +186,19 @@ def render_bgm_settings(tr):
|
||||
custom_bgm_file = st.text_input(tr("Custom Background Music File"))
|
||||
if custom_bgm_file and os.path.exists(custom_bgm_file):
|
||||
st.session_state['bgm_file'] = custom_bgm_file
|
||||
|
||||
|
||||
# 背景音乐音量
|
||||
bgm_volume = st.selectbox(
|
||||
bgm_volume = st.slider(
|
||||
tr("Background Music Volume"),
|
||||
options=[0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0],
|
||||
index=2,
|
||||
min_value=0.0,
|
||||
max_value=2.0,
|
||||
value=1.0,
|
||||
step=0.1,
|
||||
help=tr("Adjust the volume of the original audio")
|
||||
)
|
||||
st.session_state['bgm_volume'] = bgm_volume
|
||||
|
||||
|
||||
def get_audio_params():
|
||||
"""获取音频参数"""
|
||||
return {
|
||||
@ -195,4 +209,4 @@ def get_audio_params():
|
||||
'bgm_type': st.session_state.get('bgm_type', 'random'),
|
||||
'bgm_file': st.session_state.get('bgm_file', ''),
|
||||
'bgm_volume': st.session_state.get('bgm_volume', 0.2),
|
||||
}
|
||||
}
|
||||
|
||||
@ -149,6 +149,7 @@ def test_vision_model_connection(api_key, base_url, model_name, provider, tr):
|
||||
else:
|
||||
return False, f"{tr('Unsupported provider')}: {provider}"
|
||||
|
||||
|
||||
def render_vision_llm_settings(tr):
|
||||
"""渲染视频分析模型设置"""
|
||||
st.subheader(tr("Vision Model Settings"))
|
||||
@ -196,7 +197,7 @@ def render_vision_llm_settings(tr):
|
||||
elif vision_provider == 'qwenvl':
|
||||
st_vision_base_url = st.text_input(
|
||||
tr("Vision Base URL"),
|
||||
value=vision_base_url or "https://dashscope.aliyuncs.com/compatible-mode/v1",
|
||||
value=vision_base_url,
|
||||
help=tr("Default: https://dashscope.aliyuncs.com/compatible-mode/v1")
|
||||
)
|
||||
st_vision_model_name = st.text_input(
|
||||
|
||||
@ -2,12 +2,15 @@ import os
|
||||
import glob
|
||||
import json
|
||||
import time
|
||||
import traceback
|
||||
import streamlit as st
|
||||
from loguru import logger
|
||||
|
||||
from app.config import config
|
||||
from app.models.schema import VideoClipParams
|
||||
from app.utils import utils, check_script
|
||||
from webui.tools.generate_script_docu import generate_script_docu
|
||||
from webui.tools.generate_script_short import generate_script_short
|
||||
|
||||
|
||||
def render_script_panel(tr):
|
||||
@ -34,6 +37,7 @@ def render_script_file(tr, params):
|
||||
script_list = [
|
||||
(tr("None"), ""),
|
||||
(tr("Auto Generate"), "auto"),
|
||||
(tr("Short Generate"), "short"),
|
||||
(tr("Upload Script"), "upload_script") # 新增上传脚本选项
|
||||
]
|
||||
|
||||
@ -216,7 +220,9 @@ def render_script_buttons(tr, params):
|
||||
script_path = st.session_state.get('video_clip_json_path', '')
|
||||
if script_path == "auto":
|
||||
button_name = tr("Generate Video Script")
|
||||
elif script_path:
|
||||
elif script_path == "short":
|
||||
button_name = tr("Generate Short Video Script")
|
||||
elif script_path.endswith("json"):
|
||||
button_name = tr("Load Video Script")
|
||||
else:
|
||||
button_name = tr("Please Select Script File")
|
||||
@ -224,6 +230,8 @@ def render_script_buttons(tr, params):
|
||||
if st.button(button_name, key="script_action", disabled=not script_path):
|
||||
if script_path == "auto":
|
||||
generate_script_docu(tr, params)
|
||||
elif script_path == "short":
|
||||
generate_script_short(tr, params)
|
||||
else:
|
||||
load_script(tr, script_path)
|
||||
|
||||
@ -275,6 +283,7 @@ def load_script(tr, script_path):
|
||||
st.success(tr("Script loaded successfully"))
|
||||
st.rerun()
|
||||
except Exception as e:
|
||||
logger.error(f"加载脚本文件时发生错误\n{traceback.format_exc()}")
|
||||
st.error(f"{tr('Failed to load script')}: {str(e)}")
|
||||
|
||||
|
||||
@ -332,3 +341,14 @@ def crop_video(tr, params):
|
||||
time.sleep(2)
|
||||
progress_bar.empty()
|
||||
status_text.empty()
|
||||
|
||||
|
||||
def get_script_params():
|
||||
"""获取脚本参数"""
|
||||
return {
|
||||
'video_language': st.session_state.get('video_language', ''),
|
||||
'video_clip_json_path': st.session_state.get('video_clip_json_path', ''),
|
||||
'video_origin_path': st.session_state.get('video_origin_path', ''),
|
||||
'video_name': st.session_state.get('video_name', ''),
|
||||
'video_plot': st.session_state.get('video_plot', '')
|
||||
}
|
||||
|
||||
@ -1,6 +1,7 @@
|
||||
import streamlit as st
|
||||
from app.models.schema import VideoClipParams, VideoAspect
|
||||
|
||||
|
||||
def render_video_panel(tr):
|
||||
"""渲染视频配置面板"""
|
||||
with st.container(border=True):
|
||||
@ -8,6 +9,7 @@ def render_video_panel(tr):
|
||||
params = VideoClipParams()
|
||||
render_video_config(tr, params)
|
||||
|
||||
|
||||
def render_video_config(tr, params):
|
||||
"""渲染视频配置"""
|
||||
# 视频比例
|
||||
@ -39,9 +41,20 @@ def render_video_config(tr, params):
|
||||
)
|
||||
st.session_state['video_quality'] = video_qualities[quality_index][1]
|
||||
|
||||
# 原声音量
|
||||
params.original_volume = st.slider(
|
||||
tr("Original Volume"),
|
||||
min_value=0.0,
|
||||
max_value=2.0,
|
||||
value=1.0,
|
||||
step=0.1,
|
||||
help=tr("Adjust the volume of the original audio")
|
||||
)
|
||||
|
||||
|
||||
def get_video_params():
|
||||
"""获取视频参数"""
|
||||
return {
|
||||
'video_aspect': st.session_state.get('video_aspect', VideoAspect.portrait.value),
|
||||
'video_quality': st.session_state.get('video_quality', '1080p')
|
||||
}
|
||||
}
|
||||
|
||||
@ -2,13 +2,12 @@
|
||||
"Language": "简体中文",
|
||||
"Translation": {
|
||||
"Video Script Configuration": "**视频脚本配置**",
|
||||
"Generate Video Script": "生成视频脚本",
|
||||
"Generate Video Script": "AI生成画面解说脚本",
|
||||
"Video Subject": "视频主题(给定一个关键词,:red[AI自动生成]视频文案)",
|
||||
"Script Language": "生成视频脚本的语言(一般情况AI会自动根据你输入的主题语言输出)",
|
||||
"Script Files": "脚本文件",
|
||||
"Generate Video Script and Keywords": "点击使用AI根据**主题**生成 【视频文案】 和 【视频关键词】",
|
||||
"Auto Detect": "自动检测",
|
||||
"Auto Generate": "自动生成",
|
||||
"Video Theme": "视频主题",
|
||||
"Generation Prompt": "自定义提示词",
|
||||
"Save Script": "保存脚本",
|
||||
@ -188,6 +187,11 @@
|
||||
"Transcription Failed": "转录失败",
|
||||
"Mergeable Files": "可合并文件数",
|
||||
"Subtitle Content": "字幕内容",
|
||||
"Merge Result Preview": "合并结果预览"
|
||||
"Merge Result Preview": "合并结果预览",
|
||||
"Short Generate": "短剧混剪 (高燃剪辑, 当前只支持 gpt-4o 模型)",
|
||||
"Generate Short Video Script": "AI生成短剧混剪脚本",
|
||||
"Adjust the volume of the original audio": "调整原始音频的音量",
|
||||
"Original Volume": "视频音量",
|
||||
"Auto Generate": "纪录片解说 (画面解说)"
|
||||
}
|
||||
}
|
||||
|
||||
@ -1,7 +1,11 @@
|
||||
import os
|
||||
import requests
|
||||
import streamlit as st
|
||||
from loguru import logger
|
||||
from requests.adapters import HTTPAdapter
|
||||
from urllib3.util.retry import Retry
|
||||
|
||||
from app.config import config
|
||||
from app.utils import gemini_analyzer, qwenvl_analyzer
|
||||
|
||||
|
||||
@ -31,17 +35,6 @@ def create_vision_analyzer(provider, api_key, model, base_url):
|
||||
raise ValueError(f"不支持的视觉分析提供商: {provider}")
|
||||
|
||||
|
||||
def get_script_params():
|
||||
"""获取脚本参数"""
|
||||
return {
|
||||
'video_language': st.session_state.get('video_language', ''),
|
||||
'video_clip_json_path': st.session_state.get('video_clip_json_path', ''),
|
||||
'video_origin_path': st.session_state.get('video_origin_path', ''),
|
||||
'video_name': st.session_state.get('video_name', ''),
|
||||
'video_plot': st.session_state.get('video_plot', '')
|
||||
}
|
||||
|
||||
|
||||
def get_batch_timestamps(batch_files, prev_batch_files=None):
|
||||
"""
|
||||
解析一批文件的时间戳范围,支持毫秒级精度
|
||||
@ -139,3 +132,32 @@ def get_batch_files(keyframe_files, result, batch_size=5):
|
||||
batch_start = result['batch_index'] * batch_size
|
||||
batch_end = min(batch_start + batch_size, len(keyframe_files))
|
||||
return keyframe_files[batch_start:batch_end]
|
||||
|
||||
|
||||
def chekc_video_config(video_params):
|
||||
"""
|
||||
检查视频分析配置
|
||||
"""
|
||||
headers = {
|
||||
'accept': 'application/json',
|
||||
'Content-Type': 'application/json'
|
||||
}
|
||||
session = requests.Session()
|
||||
retry_strategy = Retry(
|
||||
total=3,
|
||||
backoff_factor=1,
|
||||
status_forcelist=[500, 502, 503, 504]
|
||||
)
|
||||
adapter = HTTPAdapter(max_retries=retry_strategy)
|
||||
session.mount("https://", adapter)
|
||||
try:
|
||||
session.post(
|
||||
f"{config.app.get('narrato_api_url')}/video/config",
|
||||
headers=headers,
|
||||
json=video_params,
|
||||
timeout=30,
|
||||
verify=True
|
||||
)
|
||||
return True
|
||||
except Exception as e:
|
||||
return False
|
||||
|
||||
@ -13,7 +13,7 @@ from urllib3.util.retry import Retry
|
||||
from app.config import config
|
||||
from app.utils.script_generator import ScriptProcessor
|
||||
from app.utils import utils, video_processor, video_processor_v2, qwenvl_analyzer
|
||||
from webui.tools.base import create_vision_analyzer, get_batch_files, get_batch_timestamps
|
||||
from webui.tools.base import create_vision_analyzer, get_batch_files, get_batch_timestamps, chekc_video_config
|
||||
|
||||
|
||||
def generate_script_docu(tr, params):
|
||||
@ -117,8 +117,7 @@ def generate_script_docu(tr, params):
|
||||
elif vision_llm_provider == 'qwenvl':
|
||||
vision_api_key = st.session_state.get('vision_qwenvl_api_key')
|
||||
vision_model = st.session_state.get('vision_qwenvl_model_name', 'qwen-vl-max-latest')
|
||||
vision_base_url = st.session_state.get('vision_qwenvl_base_url',
|
||||
'https://dashscope.aliyuncs.com/compatible-mode/v1')
|
||||
vision_base_url = st.session_state.get('vision_qwenvl_base_url')
|
||||
else:
|
||||
raise ValueError(f"不支持的视觉分析提供商: {vision_llm_provider}")
|
||||
|
||||
@ -228,28 +227,7 @@ def generate_script_docu(tr, params):
|
||||
"text_model_name": text_model,
|
||||
"text_base_url": text_base_url or ""
|
||||
}
|
||||
headers = {
|
||||
'accept': 'application/json',
|
||||
'Content-Type': 'application/json'
|
||||
}
|
||||
session = requests.Session()
|
||||
retry_strategy = Retry(
|
||||
total=3,
|
||||
backoff_factor=1,
|
||||
status_forcelist=[500, 502, 503, 504]
|
||||
)
|
||||
adapter = HTTPAdapter(max_retries=retry_strategy)
|
||||
session.mount("https://", adapter)
|
||||
try:
|
||||
response = session.post(
|
||||
f"{config.app.get('narrato_api_url')}/video/config",
|
||||
headers=headers,
|
||||
json=api_params,
|
||||
timeout=30,
|
||||
verify=True
|
||||
)
|
||||
except Exception as e:
|
||||
pass
|
||||
chekc_video_config(api_params)
|
||||
custom_prompt = st.session_state.get('custom_prompt', '')
|
||||
processor = ScriptProcessor(
|
||||
model_name=text_model,
|
||||
|
||||
85
webui/tools/generate_script_short.py
Normal file
85
webui/tools/generate_script_short.py
Normal file
@ -0,0 +1,85 @@
|
||||
import os
|
||||
import json
|
||||
import time
|
||||
import asyncio
|
||||
import traceback
|
||||
import requests
|
||||
import streamlit as st
|
||||
from loguru import logger
|
||||
|
||||
from app.config import config
|
||||
from webui.tools.base import chekc_video_config
|
||||
from app.services.SDP.generate_script_short import generate_script
|
||||
|
||||
|
||||
def generate_script_short(tr, params):
|
||||
"""
|
||||
生成 纪录片 视频脚本
|
||||
"""
|
||||
progress_bar = st.progress(0)
|
||||
status_text = st.empty()
|
||||
|
||||
def update_progress(progress: float, message: str = ""):
|
||||
progress_bar.progress(progress)
|
||||
if message:
|
||||
status_text.text(f"{progress}% - {message}")
|
||||
else:
|
||||
status_text.text(f"进度: {progress}%")
|
||||
|
||||
try:
|
||||
with st.spinner("正在生成脚本..."):
|
||||
text_provider = config.app.get('text_llm_provider', 'gemini').lower()
|
||||
text_api_key = config.app.get(f'text_{text_provider}_api_key')
|
||||
text_model = config.app.get(f'text_{text_provider}_model_name')
|
||||
text_base_url = config.app.get(f'text_{text_provider}_base_url')
|
||||
vision_api_key = st.session_state.get(f'vision_{text_provider}_api_key', "")
|
||||
vision_model = st.session_state.get(f'vision_{text_provider}_model_name', "")
|
||||
vision_base_url = st.session_state.get(f'vision_{text_provider}_base_url', "")
|
||||
narrato_api_key = config.app.get('narrato_api_key')
|
||||
|
||||
update_progress(20, "开始准备生成脚本")
|
||||
|
||||
srt_path = params.video_origin_path.replace(".mp4", ".srt").replace("videos", "srt").replace("video", "subtitle")
|
||||
if not os.path.exists(srt_path):
|
||||
logger.error(f"{srt_path} 文件不存在请检查或重新转录")
|
||||
st.error(f"{srt_path} 文件不存在请检查或重新转录")
|
||||
st.stop()
|
||||
|
||||
api_params = {
|
||||
"vision_api_key": vision_api_key,
|
||||
"vision_model_name": vision_model,
|
||||
"vision_base_url": vision_base_url or "",
|
||||
"text_api_key": text_api_key,
|
||||
"text_model_name": text_model,
|
||||
"text_base_url": text_base_url or ""
|
||||
}
|
||||
chekc_video_config(api_params)
|
||||
script = generate_script(
|
||||
srt_path=srt_path,
|
||||
output_path="resource/scripts/merged_subtitle.json",
|
||||
api_key=text_api_key,
|
||||
model_name=text_model,
|
||||
base_url=text_base_url,
|
||||
narrato_api_key=narrato_api_key,
|
||||
bert_path="app/models/bert/",
|
||||
)
|
||||
|
||||
if script is None:
|
||||
st.error("生成脚本失败,请检查日志")
|
||||
st.stop()
|
||||
logger.info(f"脚本生成完成 {json.dumps(script, ensure_ascii=False, indent=4)}")
|
||||
if isinstance(script, list):
|
||||
st.session_state['video_clip_json'] = script
|
||||
elif isinstance(script, str):
|
||||
st.session_state['video_clip_json'] = json.loads(script)
|
||||
update_progress(80, "脚本生成完成")
|
||||
|
||||
time.sleep(0.1)
|
||||
progress_bar.progress(100)
|
||||
status_text.text("脚本生成完成!")
|
||||
st.success("视频脚本生成成功!")
|
||||
|
||||
except Exception as err:
|
||||
progress_bar.progress(100)
|
||||
st.error(f"生成过程中发生错误: {str(err)}")
|
||||
logger.exception(f"生成脚本时发生错误\n{traceback.format_exc()}")
|
||||
Loading…
x
Reference in New Issue
Block a user