feat(webui): 大改动标记1

-重构音频设置面板,增加语音音量、背景音乐等设置
- 添加背景音乐文件选择功能
- 优化字幕设置,支持自定义字体和样式
-调整视频生成流程,支持新音频设置
- 更新文档示例,反映新功能
This commit is contained in:
linyq 2024-12-10 18:33:44 +08:00
parent 67bee9d567
commit c065800072
32 changed files with 623 additions and 803 deletions

3
.gitignore vendored
View File

@ -31,4 +31,5 @@ resource/fonts/*.ttc
resource/fonts/*.ttf
resource/fonts/*.otf
resource/srt/*.srt
app/models/faster-whisper-large-v2/*
app/models/faster-whisper-large-v2/*
app/models/bert/*

View File

@ -345,29 +345,29 @@ class VideoClipParams(BaseModel):
# video_concat_mode: Optional[VideoConcatMode] = VideoConcatMode.random.value
voice_name: Optional[str] = Field(default="zh-CN-YunjianNeural", description="语音名称")
voice_volume: Optional[float] = Field(default=1.0, description="语音音量")
voice_volume: Optional[float] = Field(default=1.0, description="解说语音音量")
voice_rate: Optional[float] = Field(default=1.0, description="语速")
voice_pitch: Optional[float] = Field(default=1.0, description="语调")
bgm_name: Optional[str] = Field(default="random", description="背景音乐名称")
bgm_type: Optional[str] = Field(default="random", description="背景音乐类型")
bgm_file: Optional[str] = Field(default="", description="背景音乐文件")
bgm_volume: Optional[float] = Field(default=0.2, description="背景音乐音量")
subtitle_enabled: Optional[bool] = Field(default=True, description="是否启用字幕")
subtitle_position: Optional[str] = Field(default="bottom", description="字幕位置") # top, bottom, center
font_name: Optional[str] = Field(default="STHeitiMedium.ttc", description="字体名称")
text_fore_color: Optional[str] = Field(default="#FFFFFF", description="文字前景色")
text_background_color: Optional[str] = Field(default="transparent", description="文字背景色")
subtitle_enabled: bool = True
font_name: str = "SimHei" # 默认使用黑体
font_size: int = 36
text_fore_color: str = "white" # 文本前景色
text_back_color: Optional[str] = None # 文本背景色
stroke_color: str = "black" # 描边颜色
stroke_width: float = 1.5 # 描边宽度
subtitle_position: str = "bottom" # top, bottom, center, custom
font_size: int = Field(default=60, description="文字大小")
stroke_color: Optional[str] = Field(default="#000000", description="文字描边颜色")
stroke_width: float = Field(default=1.5, description="文字描边宽度")
custom_position: float = Field(default=70.0, description="自定义位置")
n_threads: Optional[int] = Field(default=16, description="解说语音音量") # 线程数,有助于提升视频处理速度
tts_volume: Optional[float] = Field(default=1.0, description="解说语音音量(后处理)")
original_volume: Optional[float] = Field(default=1.0, description="视频原声音量")
bgm_volume: Optional[float] = Field(default=0.6, description="背景音乐音量")
n_threads: Optional[int] = 8 # 线程数,有助于提升视频处理速度
tts_volume: float = 1.0 # TTS音频音量
video_volume: float = 0.1 # 视频原声音量
class VideoTranscriptionRequest(BaseModel):
video_name: str
@ -376,5 +376,6 @@ class VideoTranscriptionRequest(BaseModel):
class Config:
arbitrary_types_allowed = True
class VideoTranscriptionResponse(BaseModel):
transcription: str

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

BIN
app/services/SDP/utils/utils.so Executable file

Binary file not shown.

View File

@ -157,55 +157,6 @@ def get_video_materials(task_id, params, video_terms, audio_duration):
return downloaded_videos
def generate_final_videos(
task_id, params, downloaded_videos, audio_file, subtitle_path
):
final_video_paths = []
combined_video_paths = []
video_concat_mode = (
params.video_concat_mode if params.video_count == 1 else VideoConcatMode.random
)
_progress = 50
for i in range(params.video_count):
index = i + 1
combined_video_path = path.join(
utils.task_dir(task_id), f"combined-{index}.mp4"
)
logger.info(f"\n\n## combining video: {index} => {combined_video_path}")
video.combine_videos(
combined_video_path=combined_video_path,
video_paths=downloaded_videos,
audio_file=audio_file,
video_aspect=params.video_aspect,
video_concat_mode=video_concat_mode,
max_clip_duration=params.video_clip_duration,
threads=params.n_threads,
)
_progress += 50 / params.video_count / 2
sm.state.update_task(task_id, progress=_progress)
final_video_path = path.join(utils.task_dir(task_id), f"final-{index}.mp4")
logger.info(f"\n\n## generating video: {index} => {final_video_path}")
video.generate_video(
video_path=combined_video_path,
audio_path=audio_file,
subtitle_path=subtitle_path,
output_file=final_video_path,
params=params,
)
_progress += 50 / params.video_count / 2
sm.state.update_task(task_id, progress=_progress)
final_video_paths.append(final_video_path)
combined_video_paths.append(combined_video_path)
return final_video_paths, combined_video_paths
def start_subclip(task_id: str, params: VideoClipParams, subclip_path_videos: dict):
"""后台任务(自动剪辑视频进行剪辑)"""
logger.info(f"\n\n## 开始任务: {task_id}")
@ -253,7 +204,12 @@ def start_subclip(task_id: str, params: VideoClipParams, subclip_path_videos: di
segment for segment in list_script
if segment['OST'] in [0, 2]
]
# logger.debug(f"tts_segments: {tts_segments}")
logger.debug(f"需要生成TTS的片段数: {len(tts_segments)}")
# 初始化音频文件路径
audio_files = []
final_audio = ""
if tts_segments:
audio_files, sub_maker_list = voice.tts_multiple(
task_id=task_id,
@ -267,36 +223,54 @@ def start_subclip(task_id: str, params: VideoClipParams, subclip_path_videos: di
sm.state.update_task(task_id, state=const.TASK_STATE_FAILED)
logger.error("TTS转换音频失败, 可能是网络不可用! 如果您在中国, 请使用VPN.")
return
if audio_files:
logger.info(f"合并音频文件: {audio_files}")
try:
# 传入OST信息以便正确处理音频
final_audio = audio_merger.merge_audio_files(
task_id=task_id,
audio_files=audio_files,
total_duration=total_duration,
list_script=list_script # 传入完整脚本以便处理OST
)
logger.info("音频文件合并成功")
except Exception as e:
logger.error(f"合并音频文件失败: {str(e)}")
final_audio = ""
else:
audio_files = []
logger.info(f"合并音频文件:\n{audio_files}")
# 传入OST信息以便正确处理音频
final_audio = audio_merger.merge_audio_files(
task_id=task_id,
audio_files=audio_files,
total_duration=total_duration,
list_script=list_script # 传入完整脚本以便处理OST
)
# 如果没有需要生成TTS的片段创建一个空白音频文件
# 这样可以确保后续的音频处理能正确进行
logger.info("没有需要生成TTS的片段将保留原声和背景音乐")
final_audio = path.join(utils.task_dir(task_id), "empty.mp3")
try:
from moviepy.editor import AudioClip
# 创建一个与视频等长的空白音频
empty_audio = AudioClip(make_frame=lambda t: 0, duration=total_duration)
empty_audio.write_audiofile(final_audio, fps=44100)
logger.info(f"已创建空白音频文件: {final_audio}")
except Exception as e:
logger.error(f"创建空白音频文件失败: {str(e)}")
final_audio = ""
sm.state.update_task(task_id, state=const.TASK_STATE_PROCESSING, progress=30)
# 只为OST=0或2的片段生成字幕
subtitle_path = ""
if params.subtitle_enabled:
subtitle_path = path.join(utils.task_dir(task_id), f"subtitle.srt")
subtitle_provider = config.app.get("subtitle_provider", "").strip().lower()
logger.info(f"\n\n## 3. 生成字幕、提供程序是: {subtitle_provider}")
subtitle.create(
audio_file=final_audio,
subtitle_file=subtitle_path,
)
if audio_files:
subtitle_path = path.join(utils.task_dir(task_id), f"subtitle.srt")
subtitle_provider = config.app.get("subtitle_provider", "").strip().lower()
logger.info(f"\n\n## 3. 生成字幕、提供程序是: {subtitle_provider}")
subtitle_lines = subtitle.file_to_subtitles(subtitle_path)
if not subtitle_lines:
logger.warning(f"字幕文件无效: {subtitle_path}")
subtitle_path = ""
subtitle.create(
audio_file=final_audio,
subtitle_file=subtitle_path,
)
subtitle_lines = subtitle.file_to_subtitles(subtitle_path)
if not subtitle_lines:
logger.warning(f"字幕文件无效: {subtitle_path}")
subtitle_path = ""
sm.state.update_task(task_id, state=const.TASK_STATE_PROCESSING, progress=40)
@ -335,14 +309,44 @@ def start_subclip(task_id: str, params: VideoClipParams, subclip_path_videos: di
final_video_path = path.join(utils.task_dir(task_id), f"final-{index}.mp4")
logger.info(f"\n\n## 6. 最后合成: {index} => {final_video_path}")
# 传入OST信息以便正确处理音频和视频
video.generate_video_v2(
# 获取背景音乐
bgm_path = None
if params.bgm_type or params.bgm_file:
try:
bgm_path = utils.get_bgm_file(bgm_type=params.bgm_type, bgm_file=params.bgm_file)
if bgm_path:
logger.info(f"使用背景音乐: {bgm_path}")
except Exception as e:
logger.error(f"获取背景音乐失败: {str(e)}")
# 示例:自定义字幕样式
subtitle_style = {
'fontsize': params.font_size, # 字体大小
'color': params.text_fore_color, # 字体颜色
'stroke_color': params.stroke_color, # 描边颜色
'stroke_width': params.stroke_width, # 描边宽度, 范围0-10
'bg_color': params.text_back_color, # 半透明黑色背景
'position': ('center', 0.2), # 距离顶部60%的位置
'method': 'caption' # 渲染方法
}
# 示例:自定义音量配置
volume_config = {
'original': params.original_volume, # 原声音量80%
'bgm': params.bgm_volume, # BGM音量20%
'narration': params.tts_volume # 解说音量100%
}
font_path = utils.font_dir(params.font_name)
video.generate_video_v3(
video_path=combined_video_path,
audio_path=final_audio,
subtitle_path=subtitle_path,
output_file=final_video_path,
params=params,
list_script=list_script # 传入完整脚本以便处理OST
bgm_path=bgm_path,
narration_path=final_audio,
output_path=final_video_path,
volume_config=volume_config, # 添加音量配置
subtitle_style=subtitle_style,
font_path=font_path
)
_progress += 50 / 2
@ -361,6 +365,40 @@ def start_subclip(task_id: str, params: VideoClipParams, subclip_path_videos: di
return kwargs
def validate_params(video_path, audio_path, output_file, params):
"""
验证输入参数
Args:
video_path: 视频文件路径
audio_path: 音频文件路径可以为空字符串
output_file: 输出文件路径
params: 视频参数
Raises:
FileNotFoundError: 文件不存在时抛出
ValueError: 参数无效时抛出
"""
if not video_path:
raise ValueError("视频路径不能为空")
if not os.path.exists(video_path):
raise FileNotFoundError(f"视频文件不存在: {video_path}")
# 如果提供了音频路径,则验证文件是否存在
if audio_path and not os.path.exists(audio_path):
raise FileNotFoundError(f"音频文件不存在: {audio_path}")
if not output_file:
raise ValueError("输出文件路径不能为空")
# 确保输出目录存在
output_dir = os.path.dirname(output_file)
if not os.path.exists(output_dir):
os.makedirs(output_dir)
if not params:
raise ValueError("视频参数不能为空")
if __name__ == "__main__":
# task_id = "test123"
# subclip_path_videos = {'00:41-01:58': 'E:\\projects\\NarratoAI\\storage\\cache_videos/vid-00_41-01_58.mp4',

View File

@ -1,186 +1,22 @@
import re
import os
import glob
import random
from typing import List
from typing import Union
import traceback
import pysrt
from typing import Optional
from typing import List
from loguru import logger
from moviepy.editor import *
from moviepy.video.tools.subtitles import SubtitlesClip
from PIL import ImageFont
from contextlib import contextmanager
from app.models import const
from app.models.schema import MaterialInfo, VideoAspect, VideoConcatMode, VideoParams, VideoClipParams
from app.utils import utils
from moviepy.editor import (
VideoFileClip,
AudioFileClip,
TextClip,
CompositeVideoClip,
CompositeAudioClip
)
def get_bgm_file(bgm_type: str = "random", bgm_file: str = ""):
"""
获取背景音乐文件路径
Args:
bgm_type: 背景音乐类型可选值: random(随机), ""(无背景音乐)
bgm_file: 指定的背景音乐文件路径
Returns:
str: 背景音乐文件路径
"""
if not bgm_type:
return ""
if bgm_file and os.path.exists(bgm_file):
return bgm_file
if bgm_type == "random":
song_dir = utils.song_dir()
# 检查目录是否存在
if not os.path.exists(song_dir):
logger.warning(f"背景音乐目录不存在: {song_dir}")
return ""
# 支持 mp3 和 flac 格式
mp3_files = glob.glob(os.path.join(song_dir, "*.mp3"))
flac_files = glob.glob(os.path.join(song_dir, "*.flac"))
files = mp3_files + flac_files
# 检查是否找到音乐文件
if not files:
logger.warning(f"在目录 {song_dir} 中没有找到 MP3 或 FLAC 文件")
return ""
return random.choice(files)
return ""
def combine_videos(
combined_video_path: str,
video_paths: List[str],
audio_file: str,
video_aspect: VideoAspect = VideoAspect.portrait,
video_concat_mode: VideoConcatMode = VideoConcatMode.random,
max_clip_duration: int = 5,
threads: int = 2,
) -> str:
"""
合并多个视频片段
Args:
combined_video_path: 合并后的视频保存路径
video_paths: 待合并的视频路径列表
audio_file: 音频文件路径
video_aspect: 视频宽高比
video_concat_mode: 视频拼接模式(随机/顺序)
max_clip_duration: 每个片段的最大时长()
threads: 处理线程数
Returns:
str: 合并后的视频路径
"""
audio_clip = AudioFileClip(audio_file)
audio_duration = audio_clip.duration
logger.info(f"音频时长: {audio_duration}")
# 每个片段的所需时长
req_dur = audio_duration / len(video_paths)
req_dur = max_clip_duration
logger.info(f"每个片段最大时长: {req_dur}")
output_dir = os.path.dirname(combined_video_path)
aspect = VideoAspect(video_aspect)
video_width, video_height = aspect.to_resolution()
clips = []
video_duration = 0
raw_clips = []
for video_path in video_paths:
clip = VideoFileClip(video_path).without_audio()
clip_duration = clip.duration
start_time = 0
while start_time < clip_duration:
end_time = min(start_time + max_clip_duration, clip_duration)
split_clip = clip.subclip(start_time, end_time)
raw_clips.append(split_clip)
# logger.info(f"从 {start_time:.2f} 到 {end_time:.2f}, 片段时长 {clip_duration:.2f}, 分割片段时长 {split_clip.duration:.2f}")
start_time = end_time
if video_concat_mode.value == VideoConcatMode.sequential.value:
break
# 随机视频片段顺序
if video_concat_mode.value == VideoConcatMode.random.value:
random.shuffle(raw_clips)
# 添加下载的片段,直到音频时长(max_duration)达到
while video_duration < audio_duration:
for clip in raw_clips:
# 检查片段是否比剩余音频时长长
if (audio_duration - video_duration) < clip.duration:
clip = clip.subclip(0, (audio_duration - video_duration))
# 仅当计算的片段时长(req_dur)小于实际片段时长时,缩短片段
elif req_dur < clip.duration:
clip = clip.subclip(0, req_dur)
clip = clip.set_fps(30)
# Not all videos are same size, so we need to resize them
clip_w, clip_h = clip.size
if clip_w != video_width or clip_h != video_height:
clip_ratio = clip.w / clip.h
video_ratio = video_width / video_height
if clip_ratio == video_ratio:
# 等比例缩放
clip = clip.resize((video_width, video_height))
else:
# 等比缩放视频
if clip_ratio > video_ratio:
# 按照目标宽度等比缩放
scale_factor = video_width / clip_w
else:
# 按照目标高度等比缩放
scale_factor = video_height / clip_h
new_width = int(clip_w * scale_factor)
new_height = int(clip_h * scale_factor)
clip_resized = clip.resize(newsize=(new_width, new_height))
background = ColorClip(
size=(video_width, video_height), color=(0, 0, 0)
)
clip = CompositeVideoClip(
[
background.set_duration(clip.duration),
clip_resized.set_position("center"),
]
)
logger.info(
f"调整视频尺寸为 {video_width} x {video_height}, 片段尺寸: {clip_w} x {clip_h}"
)
if clip.duration > max_clip_duration:
clip = clip.subclip(0, max_clip_duration)
clips.append(clip)
video_duration += clip.duration
video_clip = concatenate_videoclips(clips)
video_clip = video_clip.set_fps(30)
logger.info("writing")
video_clip.write_videofile(
filename=combined_video_path,
threads=threads,
logger=None,
temp_audiofile_path=output_dir,
audio_codec="aac",
fps=30,
)
video_clip.close()
logger.success("completed")
return combined_video_path
from app.models.schema import VideoAspect
def wrap_text(text, max_width, font, fontsize=60):
@ -269,259 +105,6 @@ def manage_clip(clip):
del clip
def generate_video_v2(
video_path: str,
audio_path: str,
subtitle_path: str,
output_file: str,
list_script: list,
params: Union[VideoParams, VideoClipParams],
progress_callback=None,
):
"""
合并所有素材
Args:
video_path: 视频路径
audio_path: 单个音频文件路径
subtitle_path: 字幕文件路径
output_file: 输出文件路径
params: 视频参数
progress_callback: 进度回调函数接收 0-100 的进度值
Returns:
"""
total_steps = 4
current_step = 0
def update_progress(step_name):
nonlocal current_step
current_step += 1
if progress_callback:
progress_callback(int(current_step * 100 / total_steps))
logger.info(f"完成步骤: {step_name}")
try:
validate_params(video_path, audio_path, output_file, params)
with manage_clip(VideoFileClip(video_path)) as video_clip:
aspect = VideoAspect(params.video_aspect)
video_width, video_height = aspect.to_resolution()
logger.info(f"开始,视频尺寸: {video_width} x {video_height}")
logger.info(f" ① 视频: {video_path}")
logger.info(f" ② 音频: {audio_path}")
logger.info(f" ③ 字幕: {subtitle_path}")
logger.info(f" ④ 输出: {output_file}")
output_dir = os.path.dirname(output_file)
update_progress("初始化完成")
# 字体设置
font_path = ""
if params.subtitle_enabled:
if not params.font_name:
params.font_name = "STHeitiMedium.ttc"
font_path = os.path.join(utils.font_dir(), params.font_name)
if os.name == "nt":
font_path = font_path.replace("\\", "/")
logger.info(f"使用字体: {font_path}")
def create_text_clip(subtitle_item):
phrase = subtitle_item[1]
max_width = video_width * 0.9
wrapped_txt, txt_height = wrap_text(
phrase, max_width=max_width, font=font_path, fontsize=params.font_size
)
_clip = TextClip(
wrapped_txt,
font=font_path,
fontsize=params.font_size,
color=params.text_fore_color,
bg_color=params.text_background_color,
stroke_color=params.stroke_color,
stroke_width=params.stroke_width,
print_cmd=False,
)
duration = subtitle_item[0][1] - subtitle_item[0][0]
_clip = _clip.set_start(subtitle_item[0][0])
_clip = _clip.set_end(subtitle_item[0][1])
_clip = _clip.set_duration(duration)
if params.subtitle_position == "bottom":
_clip = _clip.set_position(("center", video_height * 0.95 - _clip.h))
elif params.subtitle_position == "top":
_clip = _clip.set_position(("center", video_height * 0.05))
elif params.subtitle_position == "custom":
margin = 10
max_y = video_height - _clip.h - margin
min_y = margin
custom_y = (video_height - _clip.h) * (params.custom_position / 100)
custom_y = max(min_y, min(custom_y, max_y))
_clip = _clip.set_position(("center", custom_y))
else: # center
_clip = _clip.set_position(("center", "center"))
return _clip
update_progress("字体设置完成")
# 处理音频
original_audio = video_clip.audio
video_duration = video_clip.duration
new_audio = AudioFileClip(audio_path)
final_audio = process_audio_tracks(original_audio, new_audio, params, video_duration)
update_progress("音频处理完成")
# 处理字幕
if subtitle_path and os.path.exists(subtitle_path):
video_clip = process_subtitles(subtitle_path, video_clip, video_duration, create_text_clip)
update_progress("字幕处理完成")
# 合并音频和导出
logger.info("开始导出视频 (此步骤耗时较长请耐心等待)")
video_clip = video_clip.set_audio(final_audio)
video_clip.write_videofile(
output_file,
audio_codec="aac",
temp_audiofile=os.path.join(output_dir, "temp-audio.m4a"),
threads=params.n_threads,
logger=None,
fps=30,
)
except FileNotFoundError as e:
logger.error(f"文件不存在: {str(e)}")
raise
except Exception as e:
logger.error(f"视频生成失败: {str(e)}")
raise
finally:
logger.success("完成")
def process_audio_tracks(original_audio, new_audio, params, video_duration):
"""
处理所有音轨(原声配音背景音乐)
Args:
original_audio: 原始音频
new_audio: 新音频
params: 视频参数
video_duration: 视频时长
Returns:
CompositeAudioClip: 合成后的音频
"""
audio_tracks = []
if original_audio is not None:
audio_tracks.append(original_audio)
new_audio = new_audio.volumex(params.voice_volume)
audio_tracks.append(new_audio)
# 处理背景音乐
bgm_file = get_bgm_file(bgm_type=params.bgm_type, bgm_file=params.bgm_file)
if bgm_file:
try:
bgm_clip = AudioFileClip(bgm_file).volumex(params.bgm_volume).audio_fadeout(3)
bgm_clip = afx.audio_loop(bgm_clip, duration=video_duration)
audio_tracks.append(bgm_clip)
except Exception as e:
logger.error(f"添加背景音乐失败: {str(e)}")
return CompositeAudioClip(audio_tracks) if audio_tracks else new_audio
def process_subtitles(subtitle_path, video_clip, video_duration, create_text_clip):
"""
处理字幕
Args:
subtitle_path: 字幕文件路径
video_clip: 视频片段
video_duration: 视频时长
create_text_clip: 创建文本片段的回调函数
Returns:
CompositeVideoClip: 添加字幕后的视频
"""
if not (subtitle_path and os.path.exists(subtitle_path)):
return video_clip
sub = SubtitlesClip(subtitles=subtitle_path, encoding="utf-8")
text_clips = []
for item in sub.subtitles:
clip = create_text_clip(subtitle_item=item)
# 时间范围调整
start_time = max(clip.start, 0)
if start_time >= video_duration:
continue
end_time = min(clip.end, video_duration)
clip = clip.set_start(start_time).set_end(end_time)
text_clips.append(clip)
logger.info(f"处理了 {len(text_clips)} 段字幕")
return CompositeVideoClip([video_clip, *text_clips])
def preprocess_video(materials: List[MaterialInfo], clip_duration=4):
"""
预处理视频素材
Args:
materials: 素材信息列表
clip_duration: 片段时长()
Returns:
List[MaterialInfo]: 处理后的素材信息列表
"""
for material in materials:
if not material.url:
continue
ext = utils.parse_extension(material.url)
try:
clip = VideoFileClip(material.url)
except Exception:
clip = ImageClip(material.url)
width = clip.size[0]
height = clip.size[1]
if width < 480 or height < 480:
logger.warning(f"video is too small, width: {width}, height: {height}")
continue
if ext in const.FILE_TYPE_IMAGES:
logger.info(f"processing image: {material.url}")
# 创建一个图片剪辑并设置持续时间为3秒钟
clip = (
ImageClip(material.url)
.set_duration(clip_duration)
.set_position("center")
)
# 使用resize方法来添加缩放效果。这里使用了lambda函数来使得缩放效果随时间变化。
# 假设我们想要从原始大小逐渐放大到120%的大小。
# t代表当前时间clip.duration为视频总时长这里是3秒。
# 注意1 表示100%的大小所以1.2表示120%的大小
zoom_clip = clip.resize(
lambda t: 1 + (clip_duration * 0.03) * (t / clip.duration)
)
# 如果需要,可以创建一个包含缩放剪辑的复合频剪辑
# (这在您想要在视频中添加其他元素时非常有用)
final_clip = CompositeVideoClip([zoom_clip])
# 输出视频
video_file = f"{material.url}.mp4"
final_clip.write_videofile(video_file, fps=30, logger=None)
final_clip.close()
del final_clip
material.url = video_file
logger.success(f"completed: {video_file}")
return materials
def combine_clip_videos(combined_video_path: str,
video_paths: List[str],
video_ost_list: List[int],
@ -640,101 +223,220 @@ def resize_video_with_padding(clip, target_width: int, target_height: int):
])
def validate_params(video_path, audio_path, output_file, params):
def loop_audio_clip(audio_clip: AudioFileClip, target_duration: float) -> AudioFileClip:
"""
验证输入参数
Args:
video_path: 视频文件路径
audio_path: 音频文件路径
output_file: 输出文件路径
params: 视频参数
循环音频片段直到达到目标时长
Raises:
FileNotFoundError: 文件不存在时抛出
ValueError: 参数无效时抛出
参数:
audio_clip: 原始音频片段
target_duration: 目标时长
返回:
循环后的音频片段
"""
# 计算需要循环的次数
loops_needed = int(target_duration / audio_clip.duration) + 1
# 创建足够长的音频
extended_audio = audio_clip
for _ in range(loops_needed - 1):
extended_audio = CompositeAudioClip([
extended_audio,
audio_clip.set_start(extended_audio.duration)
])
# 裁剪到目标时长
return extended_audio.subclip(0, target_duration)
def generate_video_v3(
video_path: str,
subtitle_path: Optional[str] = None,
bgm_path: Optional[str] = None,
narration_path: Optional[str] = None,
output_path: str = "output.mp4",
# 音量相关参数
volume_config: dict = None,
# 字幕相关参数
subtitle_style: dict = None,
font_path: Optional[str] = None
) -> None:
"""
合并视频素材包括视频字幕BGM和解说音频
参数:
video_path: 原视频文件路径
subtitle_path: SRT字幕文件路径可选
bgm_path: 背景音乐文件路径可选
narration_path: 解说音频文件路径可选
output_path: 输出文件路径
volume_config: 音量配置字典可包含以下键
- original: 原声音量0-1默认1.0
- bgm: BGM音量0-1默认0.3
- narration: 解说音量0-1默认1.0
subtitle_style: 字幕样式配置字典可包含以下键
- font: 字体名称
- fontsize: 字体大小
- color: 字体颜色
- stroke_color: 描边颜色
- stroke_width: 描边宽度
- bg_color: 背景色
- position: 位置支持 'top'/'center'/'bottom' (x,y) 坐标
- method: 文字渲染方法
font_path: 字体文件路径.ttf/.otf 等格式
"""
# 检查视频文件是否存在
if not os.path.exists(video_path):
raise FileNotFoundError(f"视频文件不存在: {video_path}")
if not os.path.exists(audio_path):
raise FileNotFoundError(f"音频文件不存在: {audio_path}")
# 设置默认音量配置
default_volume = {
'original': 1.0, # 原声音量
'bgm': 0.3, # BGM音量
'narration': 1.0 # 解说音量
}
output_dir = os.path.dirname(output_file)
if not os.path.exists(output_dir):
raise FileNotFoundError(f"输出目录不存在: {output_dir}")
# 更新音量配置
if volume_config:
default_volume.update(volume_config)
if not hasattr(params, 'video_aspect'):
raise ValueError("params 缺少必要参数 video_aspect")
# 加载视频
video = VideoFileClip(video_path)
subtitle_clips = []
# 处理字幕(如果提供)
if subtitle_path:
if os.path.exists(subtitle_path):
# 检查字体文件
if font_path and not os.path.exists(font_path):
logger.info(f"警告:字体文件不存在: {font_path},将使用系统默认字体")
font_path = 'Arial'
if __name__ == "__main__":
combined_video_path = "../../storage/tasks/123/combined.mp4"
# 设置默认字幕样式
default_style = {
'font': font_path if font_path else 'Arial',
'fontsize': 24,
'color': 'white',
'stroke_color': 'black',
'stroke_width': 1,
'bg_color': None,
'position': ('center', 'bottom'),
'method': 'label'
}
video_paths = ['../../storage/temp/clip_video/0b545e689a182a91af2163c7c0ca7ca3/vid-00-00-10_000-00-00-43_039.mp4',
'../../storage/temp/clip_video/0b545e689a182a91af2163c7c0ca7ca3/vid-00-00-45_439-00-01-01_600.mp4',
'../../storage/temp/clip_video/0b545e689a182a91af2163c7c0ca7ca3/vid-00-01-07_920-00-01-25_719.mp4',
'../../storage/temp/clip_video/0b545e689a182a91af2163c7c0ca7ca3/vid-00-01-36_959-00-01-53_719.mp4']
video_ost_list = [2, 2, 2, 2]
list_script = [
{
"timestamp": "00:10-00:43",
"picture": "好的,以下是视频画面的客观描述:\n\n视频显示一个男人在一个树木繁茂的地区靠近一个泥土斜坡他穿着一件深色T恤、卡其色长裤和登山靴。他背着一个军绿色背包里面似乎装有头和其他工具。\n\n第一个镜头显示该男子从远处走近斜坡,背对着镜头。下一个镜头特写显示了的背包,一个镐头从背包中伸出来。下一个镜头显示该男子用镐头敲打斜坡。下一个镜头是该男子脚上的特写镜头,他穿着登山靴,正站在泥土斜坡上。最后一个镜显示该男子在斜坡上,仔细地拨开树根和泥土。周围的环境是树木繁茂的,阳光透过树叶照射下来。土壤是浅棕色的,斜坡上有许多树根和植被。",
"narration": "(接上文)好吧,今天我们的男主角,背着一个看似随时要发射军绿色背包,竟然化身“泥土探险家”,在斜坡上挥舞着镐头!他这是准备挖宝还是给树根做个“美容”?阳光洒下来,简直是自然界的聚光灯,仿佛在说:“快来看看,这位勇士要挑战泥土极限!”我只能默默想,如果树根能说话,它们一定会喊:“别打我,我还有家人!”这就是生活,总有些搞笑的瞬间等着我们去发现!",
"OST": 2,
"new_timestamp": "00:00:00,000-00:00:33,000"
},
{
"timestamp": "00:45-01:01",
"picture": "好的以下是视频画面的客观描述:\n\n视频显示了一个人在森林里挖掘。\n\n第一个镜头是地面特写显示出松<EFBFBD><EFBFBD>的泥土、碎石和落叶。光线照在部分区域。\n\n第二个镜头中,一模糊不清的蹲一个树根旁挖掘,一个橄榄绿色的背包放在地上。树根缠绕着常春藤。\n\n第三个镜头显示该人在一个更开阔的区域挖掘,那里有一些树根,以及部分倒的树干。他起来像是在挖掘一个较大的坑。\n\n第四个镜头是特写镜头,显示该人用工具清理土坑的墙壁。\n\n第五个镜头是土坑内部的特写镜头,可以看到土质的纹理,有一些小树根和它植被的残留物。",
"narration": "现在,这位勇敢的挖掘者就像个“现代版的土豆农夫”,在林里开辟新天地。的目标是什么?挖一个宝藏还块“树根披萨”?小心哦,别让树根追着你喊:“不要挖我,我也是有故事的!”",
"OST": 2,
"new_timestamp": "00:00:33,000-00:00:49,000"
},
{
"timestamp": "01:07-01:25",
"picture": "好,以下是视频画面的客观描述:\n\n画面1特写镜头显示出一丛带有水珠的深绿色灌木叶片。叶片呈椭圆形边缘光滑。背景是树根和泥土。\n\n画面2一个留着胡子的男人正在一个森林中土坑里挖掘。他穿着黑色T恤和卡其色裤子跪在地用具挖掘泥土。周围环绕着树木、树根和灌木。一个倒下的树干横跨土坑上方。\n\n画面3同一个男人坐在他刚才挖的坑的边缘看着前方。他的表情似乎略带沉思。背景与画面2相同。\n\n画面4一个广角镜头显示出他挖出的坑。这是一个不规则形状的土坑在树木繁茂的斜坡上。土壤呈深棕色可见树根。\n\n画面5同一个男人跪在地上用一把小斧头砍一根木头。他穿着与前几个画面相同的衣服。地面上覆盖着落叶。周围是树木和灌木。",
"narration": "“哎呀,这片灌木叶子滴水如雨,感觉像是大自然的洗发水广告!但我这位‘挖宝达人’似乎更适合拍个‘森林里的单身狗’真人秀。等会儿,我要给树根唱首歌,听说它们爱音乐!”",
"OST": 2,
"new_timestamp": "00:00:49,000-00:01:07,000"
},
{
"timestamp": "01:36-01:53",
"picture": "好的,以下是视频画面内容的客观描述:\n\n视频包含三个镜头:\n\n**镜头一:**个小型、浅水池塘,位于树林中。池塘的水看起来浑浊,呈绿褐色。池塘周围遍布泥土和落叶。多根树枝和树干横跨池塘,部分浸没在水中。周围的植被茂密主要是深色树木和灌木。\n\n**镜头二:**距拍摄树深处,阳光透过树叶洒落在植被上。镜头中可见粗大的树干、树枝和各种绿叶植物。部分树枝似乎被砍断,切口可见。\n\n**镜头三:**近距离特写镜头,聚焦在树枝和绿叶上。叶片呈圆形,颜色为鲜绿色,有些叶片上有缺损。树枝颜色较深,呈现深褐色。背景是模糊的树林。\n",
"narration": "“好吧,看来我们的‘挖宝达人’终于找到了一‘宝藏’——一个色泽如同绿豆汤的池塘!我敢打赌,这里不仅是小鱼儿的游乐场更是树枝们的‘水疗中心’!下次来这里,我得带上浮潜装备!”",
"OST": 2,
"new_timestamp": "00:01:07,000-00:01:24,000"
}
]
# 合并子视频
# combine_clip_videos(combined_video_path=combined_video_path, video_paths=video_paths, video_ost_list=video_ost_list, list_script=list_script)
if subtitle_style:
if font_path and 'font' not in subtitle_style:
subtitle_style['font'] = font_path
default_style.update(subtitle_style)
cfg = VideoClipParams()
cfg.video_aspect = VideoAspect.portrait
cfg.font_name = "STHeitiMedium.ttc"
cfg.font_size = 60
cfg.stroke_color = "#000000"
cfg.stroke_width = 1.5
cfg.text_fore_color = "#FFFFFF"
cfg.text_background_color = "transparent"
cfg.bgm_type = "random"
cfg.bgm_file = ""
cfg.bgm_volume = 1.0
cfg.subtitle_enabled = True
cfg.subtitle_position = "bottom"
cfg.n_threads = 2
cfg.video_volume = 1
try:
subs = pysrt.open(subtitle_path)
logger.info(f"读取到 {len(subs)} 条字幕")
cfg.voice_volume = 1.0
for index, sub in enumerate(subs):
start_time = sub.start.ordinal / 1000
end_time = sub.end.ordinal / 1000
video_path = "../../storage/tasks/123/combined.mp4"
audio_path = "../../storage/tasks/123/final_audio.mp3"
subtitle_path = "../../storage/tasks/123/subtitle.srt"
output_file = "../../storage/tasks/123/final-123.mp4"
try:
# 检查字幕文本是否为空
if not sub.text or sub.text.strip() == '':
logger.info(f"警告:第 {index + 1} 条字幕内容为空,已跳过")
continue
# 处理字幕文本:确保是字符串,并处理可能的列表情况
if isinstance(sub.text, (list, tuple)):
subtitle_text = ' '.join(str(item) for item in sub.text if item is not None)
else:
subtitle_text = str(sub.text)
subtitle_text = subtitle_text.strip()
if not subtitle_text:
logger.info(f"警告:第 {index + 1} 条字幕处理后为空,已跳过")
continue
# 计算位置
if isinstance(default_style['position'], tuple):
pos_x, pos_y = default_style['position']
if isinstance(pos_y, float):
y_pos = int(video.h * pos_y)
position = (pos_x, y_pos)
else:
position = default_style['position']
else:
position = default_style['position']
# 创建基本的 TextClip
text_clip = (TextClip(
subtitle_text,
font=default_style['font'],
fontsize=default_style['fontsize'],
color=default_style['color']
)
.set_position(position)
.set_duration(end_time - start_time)
.set_start(start_time))
subtitle_clips.append(text_clip)
except Exception as e:
logger.info(f"警告:创建第 {index + 1} 条字幕时出错: {str(e)}")
logger.info(f"成功创建 {len(subtitle_clips)} 条字幕剪辑")
except Exception as e:
logger.info(f"警告:处理字幕文件时出错: {str(e)}")
else:
logger.info(f"提示:字幕文件不存在: {subtitle_path}")
# 合并音频
audio_clips = []
# 添加原声(设置音量)
if video.audio is not None:
original_audio = video.audio.volumex(default_volume['original'])
audio_clips.append(original_audio)
# 添加BGM如果提供
if bgm_path:
bgm = AudioFileClip(bgm_path)
if bgm.duration < video.duration:
bgm = loop_audio_clip(bgm, video.duration)
else:
bgm = bgm.subclip(0, video.duration)
bgm = bgm.volumex(default_volume['bgm'])
audio_clips.append(bgm)
# 添加解说音频(如果提供)
if narration_path:
narration = AudioFileClip(narration_path).volumex(default_volume['narration'])
audio_clips.append(narration)
# 合成最终视频(包含字幕)
if subtitle_clips:
final_video = CompositeVideoClip([video] + subtitle_clips, size=video.size)
else:
logger.info("警告:没有字幕被添加到视频中")
final_video = video
if audio_clips:
final_audio = CompositeAudioClip(audio_clips)
final_video = final_video.set_audio(final_audio)
# 导出视频
logger.info("开始导出视频...") # 调试信息
final_video.write_videofile(
output_path,
codec='libx264',
audio_codec='aac',
fps=video.fps
)
logger.info(f"视频已导出到: {output_path}") # 调试信息
# 清理资源
video.close()
for clip in subtitle_clips:
clip.close()
if bgm_path:
bgm.close()
if narration_path:
narration.close()
generate_video_v2(video_path=video_path,
audio_path=audio_path,
subtitle_path=subtitle_path,
output_file=output_file,
params=cfg,
list_script=list_script,
)

View File

@ -7,7 +7,6 @@ import asyncio
from loguru import logger
from typing import List
from datetime import datetime
from edge_tts.submaker import mktimestamp
from xml.sax.saxutils import unescape
from edge_tts import submaker, SubMaker
from moviepy.video.tools import subtitles
@ -1199,7 +1198,7 @@ def azure_tts_v2(text: str, voice_name: str, voice_file: str) -> [SubMaker, None
except Exception as e:
logger.error(f"failed, error: {str(e)}")
if i < 2: # 如果不是最后一次重试则等待1秒
time.sleep(1)
time.sleep(3)
return None
@ -1318,96 +1317,6 @@ def create_subtitle_from_multiple(text: str, sub_maker_list: List[SubMaker], lis
traceback.print_exc()
def create_subtitle(sub_maker: submaker.SubMaker, text: str, subtitle_file: str):
"""
优化字幕文件
1. 将字幕文件按照标点符号分割成多行
2. 逐行匹配字幕文件中的文本
3. 生成新的字幕文件
"""
text = _format_text(text)
def formatter(idx: int, start_time: float, end_time: float, sub_text: str) -> str:
"""
1
00:00:00,000 --> 00:00:02,360
跑步是一项简单易行的运动
"""
start_t = mktimestamp(start_time).replace(".", ",")
end_t = mktimestamp(end_time).replace(".", ",")
return f"{idx}\n" f"{start_t} --> {end_t}\n" f"{sub_text}\n"
start_time = -1.0
sub_items = []
sub_index = 0
script_lines = utils.split_string_by_punctuations(text)
def match_line(_sub_line: str, _sub_index: int):
if len(script_lines) <= _sub_index:
return ""
_line = script_lines[_sub_index]
if _sub_line == _line:
return script_lines[_sub_index].strip()
_sub_line_ = re.sub(r"[^\w\s]", "", _sub_line)
_line_ = re.sub(r"[^\w\s]", "", _line)
if _sub_line_ == _line_:
return _line_.strip()
_sub_line_ = re.sub(r"\W+", "", _sub_line)
_line_ = re.sub(r"\W+", "", _line)
if _sub_line_ == _line_:
return _line.strip()
return ""
sub_line = ""
try:
for _, (offset, sub) in enumerate(zip(sub_maker.offset, sub_maker.subs)):
_start_time, end_time = offset
if start_time < 0:
start_time = _start_time
sub = unescape(sub)
sub_line += sub
sub_text = match_line(sub_line, sub_index)
if sub_text:
sub_index += 1
line = formatter(
idx=sub_index,
start_time=start_time,
end_time=end_time,
sub_text=sub_text,
)
sub_items.append(line)
start_time = -1.0
sub_line = ""
if len(sub_items) == len(script_lines):
with open(subtitle_file, "w", encoding="utf-8") as file:
file.write("\n".join(sub_items) + "\n")
try:
sbs = subtitles.file_to_subtitles(subtitle_file, encoding="utf-8")
duration = max([tb for ((ta, tb), txt) in sbs])
logger.info(
f"completed, subtitle file created: {subtitle_file}, duration: {duration}"
)
except Exception as e:
logger.error(f"failed, error: {str(e)}")
os.remove(subtitle_file)
else:
logger.warning(
f"failed, sub_items len: {len(sub_items)}, script_lines len: {len(script_lines)}"
)
except Exception as e:
logger.error(f"failed, error: {str(e)}")
def get_audio_duration(sub_maker: submaker.SubMaker):
"""
获取音频时长
@ -1466,20 +1375,3 @@ def tts_multiple(task_id: str, list_script: list, voice_name: str, voice_rate: f
logger.info(f"已生成音频文件: {audio_file}")
return audio_files, sub_maker_list
if __name__ == "__main__":
voice_name = "zh-CN-YunyangNeural"
# voice_name = "af-ZA-AdriNeural"
voice_name = parse_voice_name(voice_name)
print(voice_name)
with open("../../resource/scripts/2024-1203-205442.json", 'r', encoding='utf-8') as f:
data = json.load(f)
audio_files, sub_maker_list = tts_multiple(task_id="12312312", list_script=data, voice_name=voice_name, voice_rate=1, voice_pitch=1)
full_text = " ".join([item['narration'] for item in data if not item['OST']])
subtitle_file = os.path.join(utils.task_dir("12312312"), "subtitle_multiple.srt")
create_subtitle_from_multiple(full_text, sub_maker_list, data, subtitle_file)
print(f"生成的音频文件列表: {audio_files}")

View File

@ -117,6 +117,47 @@ def song_dir(sub_dir: str = ""):
return d
def get_bgm_file(bgm_type: str = "random", bgm_file: str = ""):
"""
获取背景音乐文件路径
Args:
bgm_type: 背景音乐类型可选值: random(随机), ""(无背景音乐)
bgm_file: 指定的背景音乐文件路径
Returns:
str: 背景音乐文件路径
"""
import glob
import random
if not bgm_type:
return ""
if bgm_file and os.path.exists(bgm_file):
return bgm_file
if bgm_type == "random":
song_dir_path = song_dir()
# 检查目录是否存在
if not os.path.exists(song_dir_path):
logger.warning(f"背景音乐目录不存在: {song_dir_path}")
return ""
# 支持 mp3 和 flac 格式
mp3_files = glob.glob(os.path.join(song_dir_path, "*.mp3"))
flac_files = glob.glob(os.path.join(song_dir_path, "*.flac"))
files = mp3_files + flac_files
# 检查是否找到音乐文件
if not files:
logger.warning(f"在目录 {song_dir_path} 中没有找到 MP3 或 FLAC 文件")
return ""
return random.choice(files)
return ""
def public_dir(sub_dir: str = ""):
d = resource_dir(f"public")
if sub_dir:
@ -339,7 +380,7 @@ def time_to_seconds(time_str: str) -> float:
# 分割时间部分
parts = time_part.split(':')
if len(parts) == 3: # HH:MM:SS
h, m, s = map(float, parts)
seconds = h * 3600 + m * 60 + s
@ -350,7 +391,7 @@ def time_to_seconds(time_str: str) -> float:
seconds = float(parts[0])
return seconds + ms
except (ValueError, IndexError) as e:
logger.error(f"时间格式转换错误 {time_str}: {str(e)}")
return 0.0
@ -373,16 +414,16 @@ def calculate_total_duration(scenes):
float: 总时长
"""
total_seconds = 0
for scene in scenes:
start, end = scene['timestamp'].split('-')
# 使用 time_to_seconds 函数处理更精确的时间格式
start_seconds = time_to_seconds(start)
end_seconds = time_to_seconds(end)
duration = end_seconds - start_seconds
total_seconds += duration
return total_seconds
@ -502,7 +543,7 @@ def clear_keyframes_cache(video_path: str = None):
keyframes_dir = os.path.join(temp_dir(), "keyframes")
if not os.path.exists(keyframes_dir):
return
if video_path:
# 理指定视频的缓存
video_hash = md5(video_path + str(os.path.getmtime(video_path)))
@ -516,7 +557,7 @@ def clear_keyframes_cache(video_path: str = None):
import shutil
shutil.rmtree(keyframes_dir)
logger.info("已清理所有关键帧缓存")
except Exception as e:
logger.error(f"清理关键帧缓存失败: {e}")
@ -527,15 +568,16 @@ def init_resources():
# 创建字体目录
font_dir = os.path.join(root_dir(), "resource", "fonts")
os.makedirs(font_dir, exist_ok=True)
# 检查字体文件
font_files = [
("SourceHanSansCN-Regular.otf", "https://github.com/adobe-fonts/source-han-sans/raw/release/OTF/SimplifiedChinese/SourceHanSansSC-Regular.otf"),
("SourceHanSansCN-Regular.otf",
"https://github.com/adobe-fonts/source-han-sans/raw/release/OTF/SimplifiedChinese/SourceHanSansSC-Regular.otf"),
("simhei.ttf", "C:/Windows/Fonts/simhei.ttf"), # Windows 黑体
("simkai.ttf", "C:/Windows/Fonts/simkai.ttf"), # Windows 楷体
("simsun.ttc", "C:/Windows/Fonts/simsun.ttc"), # Windows 宋体
]
# 优先使用系统字体
system_font_found = False
for font_name, source in font_files:
@ -547,16 +589,17 @@ def init_resources():
logger.info(f"已复制系统字体: {font_name}")
system_font_found = True
break
# 如果没有找到系统字体,则下载思源黑体
if not system_font_found:
source_han_path = os.path.join(font_dir, "SourceHanSansCN-Regular.otf")
if not os.path.exists(source_han_path):
download_font(font_files[0][1], source_han_path)
except Exception as e:
logger.error(f"初始化资源文件失败: {e}")
def download_font(url: str, font_path: str):
"""下载字体文件"""
try:
@ -564,16 +607,17 @@ def download_font(url: str, font_path: str):
import requests
response = requests.get(url)
response.raise_for_status()
with open(font_path, 'wb') as f:
f.write(response.content)
logger.info(f"字体文件下载成功: {font_path}")
except Exception as e:
logger.error(f"下载字体文件失败: {e}")
raise
def init_imagemagick():
"""初始化 ImageMagick 配置"""
try:
@ -583,10 +627,10 @@ def init_imagemagick():
if result.returncode != 0:
logger.error("ImageMagick 未安装或配置不正确")
return False
# 设置 IMAGEMAGICK_BINARY 环境变量
os.environ['IMAGEMAGICK_BINARY'] = 'magick'
return True
except Exception as e:
logger.error(f"初始化 ImageMagick 失败: {str(e)}")

View File

@ -11,8 +11,13 @@
vision_gemini_api_key = ""
vision_gemini_model_name = "gemini-1.5-flash"
########## Vision Qwen API Key
vision_qwenvl_api_key = ""
vision_qwenvl_model_name = "qwen-vl-max-latest"
vision_qwenvl_base_url = "https://dashscope.aliyuncs.com/compatible-mode/v1"
########### Vision NarratoAPI Key
narrato_api_key = ""
narrato_api_key = "0N0iEjU77aTqPW4d9YHCmTW2mPrfgWjDmaWAz1lTVTM"
narrato_api_url = "https://narratoinsight.scsmtech.cn/api/v1"
narrato_vision_model = "gemini-1.5-flash"
narrato_vision_key = ""
@ -32,9 +37,7 @@
########## OpenAI API Key
# Get your API key at https://platform.openai.com/api-keys
text_openai_api_key = ""
# No need to set it unless you want to use your own proxy
text_openai_base_url = ""
# Check your available models at https://platform.openai.com/account/limits
text_openai_base_url = "https://api.openai.com/v1"
text_openai_model_name = "gpt-4o-mini"
########## Moonshot API Key
@ -66,7 +69,8 @@
# https://tongyi.aliyun.com/qianwen/
# https://help.aliyun.com/zh/dashscope/developer-reference/model-introduction
text_qwen_api_key = ""
text_qwen_model_name = "qwen-max"
text_qwen_model_name = "qwen-plus-1127"
text_qwen_base_url = "https://dashscope.aliyuncs.com/compatible-mode/v1"
########## DeepSeek API Key
# Visit https://platform.deepseek.com/api_keys to get your API key

View File

@ -1,7 +1,6 @@
requests~=2.31.0
moviepy==2.0.0.dev2
faster-whisper~=1.0.1
edge_tts~=6.1.15
uvicorn~=0.27.1
fastapi~=0.115.4
tomli~=2.0.1
@ -35,3 +34,5 @@ tiktoken==0.8.0
yt-dlp==2024.11.18
pysrt==1.1.2
httpx==0.27.2
transformers==4.47.0
edge-tts==6.1.19

View File

@ -6,23 +6,25 @@ from app.services import voice
from app.utils import utils
from webui.utils.cache import get_songs_cache
def render_audio_panel(tr):
"""渲染音频设置面板"""
with st.container(border=True):
st.write(tr("Audio Settings"))
# 渲染TTS设置
render_tts_settings(tr)
# 渲染背景音乐设置
render_bgm_settings(tr)
def render_tts_settings(tr):
"""渲染TTS(文本转语音)设置"""
# 获取支持的语音列表
support_locales = ["zh-CN"]
voices = voice.get_all_azure_voices(filter_locals=support_locales)
# 创建友好的显示名称
friendly_names = {
v: v.replace("Female", tr("Female"))
@ -30,11 +32,11 @@ def render_tts_settings(tr):
.replace("Neural", "")
for v in voices
}
# 获取保存的语音设置
saved_voice_name = config.ui.get("voice_name", "")
saved_voice_name_index = 0
if saved_voice_name in friendly_names:
saved_voice_name_index = list(friendly_names.keys()).index(saved_voice_name)
else:
@ -56,7 +58,7 @@ def render_tts_settings(tr):
voice_name = list(friendly_names.keys())[
list(friendly_names.values()).index(selected_friendly_name)
]
# 保存设置
config.ui["voice_name"] = voice_name
@ -70,34 +72,40 @@ def render_tts_settings(tr):
# 试听按钮
render_voice_preview(tr, voice_name)
def render_azure_v2_settings(tr):
"""渲染Azure V2语音设置"""
saved_azure_speech_region = config.azure.get("speech_region", "")
saved_azure_speech_key = config.azure.get("speech_key", "")
azure_speech_region = st.text_input(
tr("Speech Region"),
tr("Speech Region"),
value=saved_azure_speech_region
)
azure_speech_key = st.text_input(
tr("Speech Key"),
value=saved_azure_speech_key,
tr("Speech Key"),
value=saved_azure_speech_key,
type="password"
)
config.azure["speech_region"] = azure_speech_region
config.azure["speech_key"] = azure_speech_key
def render_voice_parameters(tr):
"""渲染语音参数设置"""
# 音量
voice_volume = st.selectbox(
voice_volume = st.slider(
tr("Speech Volume"),
options=[0.6, 0.8, 1.0, 1.2, 1.5, 2.0, 3.0, 4.0, 5.0],
index=2,
min_value=0.0,
max_value=2.0,
value=1.0,
step=0.1,
help=tr("Adjust the volume of the original audio")
)
st.session_state['voice_volume'] = voice_volume
# 语速
voice_rate = st.selectbox(
tr("Speech Rate"),
@ -114,6 +122,7 @@ def render_voice_parameters(tr):
)
st.session_state['voice_pitch'] = voice_pitch
def render_voice_preview(tr, voice_name):
"""渲染语音试听功能"""
if st.button(tr("Play Voice")):
@ -122,11 +131,11 @@ def render_voice_preview(tr, voice_name):
play_content = st.session_state.get('video_script', '')
if not play_content:
play_content = tr("Voice Example")
with st.spinner(tr("Synthesizing Voice")):
temp_dir = utils.storage_dir("temp", create=True)
audio_file = os.path.join(temp_dir, f"tmp-voice-{str(uuid4())}.mp3")
sub_maker = voice.tts(
text=play_content,
voice_name=voice_name,
@ -134,7 +143,7 @@ def render_voice_preview(tr, voice_name):
voice_pitch=st.session_state.get('voice_pitch', 1.0),
voice_file=audio_file,
)
# 如果语音文件生成失败,使用默认内容重试
if not sub_maker:
play_content = "This is a example voice. if you hear this, the voice synthesis failed with the original content."
@ -151,6 +160,7 @@ def render_voice_preview(tr, voice_name):
if os.path.exists(audio_file):
os.remove(audio_file)
def render_bgm_settings(tr):
"""渲染背景音乐设置"""
# 背景音乐选项
@ -159,14 +169,14 @@ def render_bgm_settings(tr):
(tr("Random Background Music"), "random"),
(tr("Custom Background Music"), "custom"),
]
selected_index = st.selectbox(
tr("Background Music"),
index=1,
options=range(len(bgm_options)),
format_func=lambda x: bgm_options[x][0],
)
# 获取选择的背景音乐类型
bgm_type = bgm_options[selected_index][1]
st.session_state['bgm_type'] = bgm_type
@ -176,15 +186,19 @@ def render_bgm_settings(tr):
custom_bgm_file = st.text_input(tr("Custom Background Music File"))
if custom_bgm_file and os.path.exists(custom_bgm_file):
st.session_state['bgm_file'] = custom_bgm_file
# 背景音乐音量
bgm_volume = st.selectbox(
bgm_volume = st.slider(
tr("Background Music Volume"),
options=[0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0],
index=2,
min_value=0.0,
max_value=2.0,
value=1.0,
step=0.1,
help=tr("Adjust the volume of the original audio")
)
st.session_state['bgm_volume'] = bgm_volume
def get_audio_params():
"""获取音频参数"""
return {
@ -195,4 +209,4 @@ def get_audio_params():
'bgm_type': st.session_state.get('bgm_type', 'random'),
'bgm_file': st.session_state.get('bgm_file', ''),
'bgm_volume': st.session_state.get('bgm_volume', 0.2),
}
}

View File

@ -149,6 +149,7 @@ def test_vision_model_connection(api_key, base_url, model_name, provider, tr):
else:
return False, f"{tr('Unsupported provider')}: {provider}"
def render_vision_llm_settings(tr):
"""渲染视频分析模型设置"""
st.subheader(tr("Vision Model Settings"))
@ -196,7 +197,7 @@ def render_vision_llm_settings(tr):
elif vision_provider == 'qwenvl':
st_vision_base_url = st.text_input(
tr("Vision Base URL"),
value=vision_base_url or "https://dashscope.aliyuncs.com/compatible-mode/v1",
value=vision_base_url,
help=tr("Default: https://dashscope.aliyuncs.com/compatible-mode/v1")
)
st_vision_model_name = st.text_input(

View File

@ -2,12 +2,15 @@ import os
import glob
import json
import time
import traceback
import streamlit as st
from loguru import logger
from app.config import config
from app.models.schema import VideoClipParams
from app.utils import utils, check_script
from webui.tools.generate_script_docu import generate_script_docu
from webui.tools.generate_script_short import generate_script_short
def render_script_panel(tr):
@ -34,6 +37,7 @@ def render_script_file(tr, params):
script_list = [
(tr("None"), ""),
(tr("Auto Generate"), "auto"),
(tr("Short Generate"), "short"),
(tr("Upload Script"), "upload_script") # 新增上传脚本选项
]
@ -216,7 +220,9 @@ def render_script_buttons(tr, params):
script_path = st.session_state.get('video_clip_json_path', '')
if script_path == "auto":
button_name = tr("Generate Video Script")
elif script_path:
elif script_path == "short":
button_name = tr("Generate Short Video Script")
elif script_path.endswith("json"):
button_name = tr("Load Video Script")
else:
button_name = tr("Please Select Script File")
@ -224,6 +230,8 @@ def render_script_buttons(tr, params):
if st.button(button_name, key="script_action", disabled=not script_path):
if script_path == "auto":
generate_script_docu(tr, params)
elif script_path == "short":
generate_script_short(tr, params)
else:
load_script(tr, script_path)
@ -275,6 +283,7 @@ def load_script(tr, script_path):
st.success(tr("Script loaded successfully"))
st.rerun()
except Exception as e:
logger.error(f"加载脚本文件时发生错误\n{traceback.format_exc()}")
st.error(f"{tr('Failed to load script')}: {str(e)}")
@ -332,3 +341,14 @@ def crop_video(tr, params):
time.sleep(2)
progress_bar.empty()
status_text.empty()
def get_script_params():
"""获取脚本参数"""
return {
'video_language': st.session_state.get('video_language', ''),
'video_clip_json_path': st.session_state.get('video_clip_json_path', ''),
'video_origin_path': st.session_state.get('video_origin_path', ''),
'video_name': st.session_state.get('video_name', ''),
'video_plot': st.session_state.get('video_plot', '')
}

View File

@ -1,6 +1,7 @@
import streamlit as st
from app.models.schema import VideoClipParams, VideoAspect
def render_video_panel(tr):
"""渲染视频配置面板"""
with st.container(border=True):
@ -8,6 +9,7 @@ def render_video_panel(tr):
params = VideoClipParams()
render_video_config(tr, params)
def render_video_config(tr, params):
"""渲染视频配置"""
# 视频比例
@ -39,9 +41,20 @@ def render_video_config(tr, params):
)
st.session_state['video_quality'] = video_qualities[quality_index][1]
# 原声音量
params.original_volume = st.slider(
tr("Original Volume"),
min_value=0.0,
max_value=2.0,
value=1.0,
step=0.1,
help=tr("Adjust the volume of the original audio")
)
def get_video_params():
"""获取视频参数"""
return {
'video_aspect': st.session_state.get('video_aspect', VideoAspect.portrait.value),
'video_quality': st.session_state.get('video_quality', '1080p')
}
}

View File

@ -2,13 +2,12 @@
"Language": "简体中文",
"Translation": {
"Video Script Configuration": "**视频脚本配置**",
"Generate Video Script": "生成视频脚本",
"Generate Video Script": "AI生成画面解说脚本",
"Video Subject": "视频主题(给定一个关键词,:red[AI自动生成]视频文案)",
"Script Language": "生成视频脚本的语言一般情况AI会自动根据你输入的主题语言输出",
"Script Files": "脚本文件",
"Generate Video Script and Keywords": "点击使用AI根据**主题**生成 【视频文案】 和 【视频关键词】",
"Auto Detect": "自动检测",
"Auto Generate": "自动生成",
"Video Theme": "视频主题",
"Generation Prompt": "自定义提示词",
"Save Script": "保存脚本",
@ -188,6 +187,11 @@
"Transcription Failed": "转录失败",
"Mergeable Files": "可合并文件数",
"Subtitle Content": "字幕内容",
"Merge Result Preview": "合并结果预览"
"Merge Result Preview": "合并结果预览",
"Short Generate": "短剧混剪 (高燃剪辑, 当前只支持 gpt-4o 模型)",
"Generate Short Video Script": "AI生成短剧混剪脚本",
"Adjust the volume of the original audio": "调整原始音频的音量",
"Original Volume": "视频音量",
"Auto Generate": "纪录片解说 (画面解说)"
}
}

View File

@ -1,7 +1,11 @@
import os
import requests
import streamlit as st
from loguru import logger
from requests.adapters import HTTPAdapter
from urllib3.util.retry import Retry
from app.config import config
from app.utils import gemini_analyzer, qwenvl_analyzer
@ -31,17 +35,6 @@ def create_vision_analyzer(provider, api_key, model, base_url):
raise ValueError(f"不支持的视觉分析提供商: {provider}")
def get_script_params():
"""获取脚本参数"""
return {
'video_language': st.session_state.get('video_language', ''),
'video_clip_json_path': st.session_state.get('video_clip_json_path', ''),
'video_origin_path': st.session_state.get('video_origin_path', ''),
'video_name': st.session_state.get('video_name', ''),
'video_plot': st.session_state.get('video_plot', '')
}
def get_batch_timestamps(batch_files, prev_batch_files=None):
"""
解析一批文件的时间戳范围,支持毫秒级精度
@ -139,3 +132,32 @@ def get_batch_files(keyframe_files, result, batch_size=5):
batch_start = result['batch_index'] * batch_size
batch_end = min(batch_start + batch_size, len(keyframe_files))
return keyframe_files[batch_start:batch_end]
def chekc_video_config(video_params):
"""
检查视频分析配置
"""
headers = {
'accept': 'application/json',
'Content-Type': 'application/json'
}
session = requests.Session()
retry_strategy = Retry(
total=3,
backoff_factor=1,
status_forcelist=[500, 502, 503, 504]
)
adapter = HTTPAdapter(max_retries=retry_strategy)
session.mount("https://", adapter)
try:
session.post(
f"{config.app.get('narrato_api_url')}/video/config",
headers=headers,
json=video_params,
timeout=30,
verify=True
)
return True
except Exception as e:
return False

View File

@ -13,7 +13,7 @@ from urllib3.util.retry import Retry
from app.config import config
from app.utils.script_generator import ScriptProcessor
from app.utils import utils, video_processor, video_processor_v2, qwenvl_analyzer
from webui.tools.base import create_vision_analyzer, get_batch_files, get_batch_timestamps
from webui.tools.base import create_vision_analyzer, get_batch_files, get_batch_timestamps, chekc_video_config
def generate_script_docu(tr, params):
@ -117,8 +117,7 @@ def generate_script_docu(tr, params):
elif vision_llm_provider == 'qwenvl':
vision_api_key = st.session_state.get('vision_qwenvl_api_key')
vision_model = st.session_state.get('vision_qwenvl_model_name', 'qwen-vl-max-latest')
vision_base_url = st.session_state.get('vision_qwenvl_base_url',
'https://dashscope.aliyuncs.com/compatible-mode/v1')
vision_base_url = st.session_state.get('vision_qwenvl_base_url')
else:
raise ValueError(f"不支持的视觉分析提供商: {vision_llm_provider}")
@ -228,28 +227,7 @@ def generate_script_docu(tr, params):
"text_model_name": text_model,
"text_base_url": text_base_url or ""
}
headers = {
'accept': 'application/json',
'Content-Type': 'application/json'
}
session = requests.Session()
retry_strategy = Retry(
total=3,
backoff_factor=1,
status_forcelist=[500, 502, 503, 504]
)
adapter = HTTPAdapter(max_retries=retry_strategy)
session.mount("https://", adapter)
try:
response = session.post(
f"{config.app.get('narrato_api_url')}/video/config",
headers=headers,
json=api_params,
timeout=30,
verify=True
)
except Exception as e:
pass
chekc_video_config(api_params)
custom_prompt = st.session_state.get('custom_prompt', '')
processor = ScriptProcessor(
model_name=text_model,

View File

@ -0,0 +1,85 @@
import os
import json
import time
import asyncio
import traceback
import requests
import streamlit as st
from loguru import logger
from app.config import config
from webui.tools.base import chekc_video_config
from app.services.SDP.generate_script_short import generate_script
def generate_script_short(tr, params):
"""
生成 纪录片 视频脚本
"""
progress_bar = st.progress(0)
status_text = st.empty()
def update_progress(progress: float, message: str = ""):
progress_bar.progress(progress)
if message:
status_text.text(f"{progress}% - {message}")
else:
status_text.text(f"进度: {progress}%")
try:
with st.spinner("正在生成脚本..."):
text_provider = config.app.get('text_llm_provider', 'gemini').lower()
text_api_key = config.app.get(f'text_{text_provider}_api_key')
text_model = config.app.get(f'text_{text_provider}_model_name')
text_base_url = config.app.get(f'text_{text_provider}_base_url')
vision_api_key = st.session_state.get(f'vision_{text_provider}_api_key', "")
vision_model = st.session_state.get(f'vision_{text_provider}_model_name', "")
vision_base_url = st.session_state.get(f'vision_{text_provider}_base_url', "")
narrato_api_key = config.app.get('narrato_api_key')
update_progress(20, "开始准备生成脚本")
srt_path = params.video_origin_path.replace(".mp4", ".srt").replace("videos", "srt").replace("video", "subtitle")
if not os.path.exists(srt_path):
logger.error(f"{srt_path} 文件不存在请检查或重新转录")
st.error(f"{srt_path} 文件不存在请检查或重新转录")
st.stop()
api_params = {
"vision_api_key": vision_api_key,
"vision_model_name": vision_model,
"vision_base_url": vision_base_url or "",
"text_api_key": text_api_key,
"text_model_name": text_model,
"text_base_url": text_base_url or ""
}
chekc_video_config(api_params)
script = generate_script(
srt_path=srt_path,
output_path="resource/scripts/merged_subtitle.json",
api_key=text_api_key,
model_name=text_model,
base_url=text_base_url,
narrato_api_key=narrato_api_key,
bert_path="app/models/bert/",
)
if script is None:
st.error("生成脚本失败,请检查日志")
st.stop()
logger.info(f"脚本生成完成 {json.dumps(script, ensure_ascii=False, indent=4)}")
if isinstance(script, list):
st.session_state['video_clip_json'] = script
elif isinstance(script, str):
st.session_state['video_clip_json'] = json.loads(script)
update_progress(80, "脚本生成完成")
time.sleep(0.1)
progress_bar.progress(100)
status_text.text("脚本生成完成!")
st.success("视频脚本生成成功!")
except Exception as err:
progress_bar.progress(100)
st.error(f"生成过程中发生错误: {str(err)}")
logger.exception(f"生成脚本时发生错误\n{traceback.format_exc()}")