diff --git a/.gitignore b/.gitignore index d0a8e81..4bea0a5 100644 --- a/.gitignore +++ b/.gitignore @@ -31,4 +31,5 @@ resource/fonts/*.ttc resource/fonts/*.ttf resource/fonts/*.otf resource/srt/*.srt -app/models/faster-whisper-large-v2/* \ No newline at end of file +app/models/faster-whisper-large-v2/* +app/models/bert/* diff --git a/app/models/schema.py b/app/models/schema.py index 6621772..5e2e909 100644 --- a/app/models/schema.py +++ b/app/models/schema.py @@ -345,29 +345,29 @@ class VideoClipParams(BaseModel): # video_concat_mode: Optional[VideoConcatMode] = VideoConcatMode.random.value voice_name: Optional[str] = Field(default="zh-CN-YunjianNeural", description="语音名称") - voice_volume: Optional[float] = Field(default=1.0, description="语音音量") + voice_volume: Optional[float] = Field(default=1.0, description="解说语音音量") voice_rate: Optional[float] = Field(default=1.0, description="语速") voice_pitch: Optional[float] = Field(default=1.0, description="语调") bgm_name: Optional[str] = Field(default="random", description="背景音乐名称") bgm_type: Optional[str] = Field(default="random", description="背景音乐类型") bgm_file: Optional[str] = Field(default="", description="背景音乐文件") - bgm_volume: Optional[float] = Field(default=0.2, description="背景音乐音量") - subtitle_enabled: Optional[bool] = Field(default=True, description="是否启用字幕") - subtitle_position: Optional[str] = Field(default="bottom", description="字幕位置") # top, bottom, center - font_name: Optional[str] = Field(default="STHeitiMedium.ttc", description="字体名称") - text_fore_color: Optional[str] = Field(default="#FFFFFF", description="文字前景色") - text_background_color: Optional[str] = Field(default="transparent", description="文字背景色") + subtitle_enabled: bool = True + font_name: str = "SimHei" # 默认使用黑体 + font_size: int = 36 + text_fore_color: str = "white" # 文本前景色 + text_back_color: Optional[str] = None # 文本背景色 + stroke_color: str = "black" # 描边颜色 + stroke_width: float = 1.5 # 描边宽度 + subtitle_position: str = "bottom" # top, bottom, center, custom - font_size: int = Field(default=60, description="文字大小") - stroke_color: Optional[str] = Field(default="#000000", description="文字描边颜色") - stroke_width: float = Field(default=1.5, description="文字描边宽度") - custom_position: float = Field(default=70.0, description="自定义位置") + n_threads: Optional[int] = Field(default=16, description="解说语音音量") # 线程���,有助于提升视频处理速度 + + tts_volume: Optional[float] = Field(default=1.0, description="解说语音音量(后处理)") + original_volume: Optional[float] = Field(default=1.0, description="视频原声音量") + bgm_volume: Optional[float] = Field(default=0.6, description="背景音乐音量") - n_threads: Optional[int] = 8 # 线程数,有助于提升视频处理速度 - tts_volume: float = 1.0 # TTS音频音量 - video_volume: float = 0.1 # 视频原声音量 class VideoTranscriptionRequest(BaseModel): video_name: str @@ -376,5 +376,13 @@ class VideoTranscriptionRequest(BaseModel): class Config: arbitrary_types_allowed = True + class VideoTranscriptionResponse(BaseModel): transcription: str + + +class SubtitlePosition(str, Enum): + TOP = "top" + CENTER = "center" + BOTTOM = "bottom" + diff --git a/app/services/SDP/generate_script_short.pyd b/app/services/SDP/generate_script_short.pyd new file mode 100644 index 0000000..03be9bc Binary files /dev/null and b/app/services/SDP/generate_script_short.pyd differ diff --git a/app/services/SDP/generate_script_short.so b/app/services/SDP/generate_script_short.so new file mode 100755 index 0000000..054dbcc Binary files /dev/null and b/app/services/SDP/generate_script_short.so differ diff --git a/app/services/SDP/utils/short_schema.pyd b/app/services/SDP/utils/short_schema.pyd new file mode 100644 index 0000000..b11a996 Binary files /dev/null and b/app/services/SDP/utils/short_schema.pyd differ diff --git a/app/services/SDP/utils/short_schema.so b/app/services/SDP/utils/short_schema.so new file mode 100755 index 0000000..dd7aaca Binary files /dev/null and b/app/services/SDP/utils/short_schema.so differ diff --git a/app/services/SDP/utils/step1_subtitle_analyzer_openai.pyd b/app/services/SDP/utils/step1_subtitle_analyzer_openai.pyd new file mode 100644 index 0000000..35e15f0 Binary files /dev/null and b/app/services/SDP/utils/step1_subtitle_analyzer_openai.pyd differ diff --git a/app/services/SDP/utils/step1_subtitle_analyzer_openai.so b/app/services/SDP/utils/step1_subtitle_analyzer_openai.so new file mode 100755 index 0000000..864e2b5 Binary files /dev/null and b/app/services/SDP/utils/step1_subtitle_analyzer_openai.so differ diff --git a/app/services/SDP/utils/step2_subtitle_analyzer_bert.pyd b/app/services/SDP/utils/step2_subtitle_analyzer_bert.pyd new file mode 100644 index 0000000..537d98a Binary files /dev/null and b/app/services/SDP/utils/step2_subtitle_analyzer_bert.pyd differ diff --git a/app/services/SDP/utils/step2_subtitle_analyzer_bert.so b/app/services/SDP/utils/step2_subtitle_analyzer_bert.so new file mode 100755 index 0000000..37e3e3e Binary files /dev/null and b/app/services/SDP/utils/step2_subtitle_analyzer_bert.so differ diff --git a/app/services/SDP/utils/step3_fragment_check.pyd b/app/services/SDP/utils/step3_fragment_check.pyd new file mode 100644 index 0000000..ddbeb66 Binary files /dev/null and b/app/services/SDP/utils/step3_fragment_check.pyd differ diff --git a/app/services/SDP/utils/step3_fragment_check.so b/app/services/SDP/utils/step3_fragment_check.so new file mode 100755 index 0000000..e69db3e Binary files /dev/null and b/app/services/SDP/utils/step3_fragment_check.so differ diff --git a/app/services/SDP/utils/step4_text_generate.pyd b/app/services/SDP/utils/step4_text_generate.pyd new file mode 100644 index 0000000..87dc49d Binary files /dev/null and b/app/services/SDP/utils/step4_text_generate.pyd differ diff --git a/app/services/SDP/utils/step4_text_generate.so b/app/services/SDP/utils/step4_text_generate.so new file mode 100755 index 0000000..04386e9 Binary files /dev/null and b/app/services/SDP/utils/step4_text_generate.so differ diff --git a/app/services/SDP/utils/step5_merge_script.pyd b/app/services/SDP/utils/step5_merge_script.pyd new file mode 100644 index 0000000..0ee76c6 Binary files /dev/null and b/app/services/SDP/utils/step5_merge_script.pyd differ diff --git a/app/services/SDP/utils/step5_merge_script.so b/app/services/SDP/utils/step5_merge_script.so new file mode 100755 index 0000000..31ff759 Binary files /dev/null and b/app/services/SDP/utils/step5_merge_script.so differ diff --git a/app/services/SDP/utils/utils.pyd b/app/services/SDP/utils/utils.pyd new file mode 100644 index 0000000..1dca465 Binary files /dev/null and b/app/services/SDP/utils/utils.pyd differ diff --git a/app/services/SDP/utils/utils.so b/app/services/SDP/utils/utils.so new file mode 100755 index 0000000..66754dc Binary files /dev/null and b/app/services/SDP/utils/utils.so differ diff --git a/app/services/task.py b/app/services/task.py index 77f2cf5..6704f0d 100644 --- a/app/services/task.py +++ b/app/services/task.py @@ -157,55 +157,6 @@ def get_video_materials(task_id, params, video_terms, audio_duration): return downloaded_videos -def generate_final_videos( - task_id, params, downloaded_videos, audio_file, subtitle_path -): - final_video_paths = [] - combined_video_paths = [] - video_concat_mode = ( - params.video_concat_mode if params.video_count == 1 else VideoConcatMode.random - ) - - _progress = 50 - for i in range(params.video_count): - index = i + 1 - combined_video_path = path.join( - utils.task_dir(task_id), f"combined-{index}.mp4" - ) - logger.info(f"\n\n## combining video: {index} => {combined_video_path}") - video.combine_videos( - combined_video_path=combined_video_path, - video_paths=downloaded_videos, - audio_file=audio_file, - video_aspect=params.video_aspect, - video_concat_mode=video_concat_mode, - max_clip_duration=params.video_clip_duration, - threads=params.n_threads, - ) - - _progress += 50 / params.video_count / 2 - sm.state.update_task(task_id, progress=_progress) - - final_video_path = path.join(utils.task_dir(task_id), f"final-{index}.mp4") - - logger.info(f"\n\n## generating video: {index} => {final_video_path}") - video.generate_video( - video_path=combined_video_path, - audio_path=audio_file, - subtitle_path=subtitle_path, - output_file=final_video_path, - params=params, - ) - - _progress += 50 / params.video_count / 2 - sm.state.update_task(task_id, progress=_progress) - - final_video_paths.append(final_video_path) - combined_video_paths.append(combined_video_path) - - return final_video_paths, combined_video_paths - - def start_subclip(task_id: str, params: VideoClipParams, subclip_path_videos: dict): """后台任务(自动剪辑视频进行剪辑)""" logger.info(f"\n\n## 开始任务: {task_id}") @@ -253,7 +204,12 @@ def start_subclip(task_id: str, params: VideoClipParams, subclip_path_videos: di segment for segment in list_script if segment['OST'] in [0, 2] ] - # logger.debug(f"tts_segments: {tts_segments}") + logger.debug(f"需要生成TTS的片段数: {len(tts_segments)}") + + # 初始化音频文件路径 + audio_files = [] + final_audio = "" + if tts_segments: audio_files, sub_maker_list = voice.tts_multiple( task_id=task_id, @@ -267,36 +223,54 @@ def start_subclip(task_id: str, params: VideoClipParams, subclip_path_videos: di sm.state.update_task(task_id, state=const.TASK_STATE_FAILED) logger.error("TTS转换音频失败, 可能是网络不可用! 如果您在中国, 请使用VPN.") return + + if audio_files: + logger.info(f"合并音频文件: {audio_files}") + try: + # 传入OST信息以便正确处理音频 + final_audio = audio_merger.merge_audio_files( + task_id=task_id, + audio_files=audio_files, + total_duration=total_duration, + list_script=list_script # 传入完整脚本以便处理OST + ) + logger.info("音频文件合并成功") + except Exception as e: + logger.error(f"合并音频文件失败: {str(e)}") + final_audio = "" else: - audio_files = [] - - logger.info(f"合并音频文件:\n{audio_files}") - # 传入OST信息以便正确处理音频 - final_audio = audio_merger.merge_audio_files( - task_id=task_id, - audio_files=audio_files, - total_duration=total_duration, - list_script=list_script # 传入完整脚本以便处理OST - ) + # 如果没有需要生成TTS的片段,创建一个空白音频文件 + # 这样可以确保后续的音频处理能正确进行 + logger.info("没有需要生成TTS的片段,将保留原声和背景音乐") + final_audio = path.join(utils.task_dir(task_id), "empty.mp3") + try: + from moviepy.editor import AudioClip + # 创建一个与视频等长的空白音频 + empty_audio = AudioClip(make_frame=lambda t: 0, duration=total_duration) + empty_audio.write_audiofile(final_audio, fps=44100) + logger.info(f"已创建空白音频文件: {final_audio}") + except Exception as e: + logger.error(f"创建空白音频文件失败: {str(e)}") + final_audio = "" sm.state.update_task(task_id, state=const.TASK_STATE_PROCESSING, progress=30) - # 只为OST=0或2的片段生成字幕 subtitle_path = "" if params.subtitle_enabled: - subtitle_path = path.join(utils.task_dir(task_id), f"subtitle.srt") - subtitle_provider = config.app.get("subtitle_provider", "").strip().lower() - logger.info(f"\n\n## 3. 生成字幕、提供程序是: {subtitle_provider}") - - subtitle.create( - audio_file=final_audio, - subtitle_file=subtitle_path, - ) + if audio_files: + subtitle_path = path.join(utils.task_dir(task_id), f"subtitle.srt") + subtitle_provider = config.app.get("subtitle_provider", "").strip().lower() + logger.info(f"\n\n## 3. 生成字幕、提供程序是: {subtitle_provider}") - subtitle_lines = subtitle.file_to_subtitles(subtitle_path) - if not subtitle_lines: - logger.warning(f"字幕文件无效: {subtitle_path}") - subtitle_path = "" + subtitle.create( + audio_file=final_audio, + subtitle_file=subtitle_path, + ) + + subtitle_lines = subtitle.file_to_subtitles(subtitle_path) + if not subtitle_lines: + logger.warning(f"字幕文件无效: {subtitle_path}") + subtitle_path = "" sm.state.update_task(task_id, state=const.TASK_STATE_PROCESSING, progress=40) @@ -335,14 +309,44 @@ def start_subclip(task_id: str, params: VideoClipParams, subclip_path_videos: di final_video_path = path.join(utils.task_dir(task_id), f"final-{index}.mp4") logger.info(f"\n\n## 6. 最后合成: {index} => {final_video_path}") - # 传入OST信息以便正确处理音频和视频 - video.generate_video_v2( + + # 获取背景音乐 + bgm_path = None + if params.bgm_type or params.bgm_file: + try: + bgm_path = utils.get_bgm_file(bgm_type=params.bgm_type, bgm_file=params.bgm_file) + if bgm_path: + logger.info(f"使用背景音乐: {bgm_path}") + except Exception as e: + logger.error(f"获取背景音乐失败: {str(e)}") + + # 示例:自定义字幕样式 + subtitle_style = { + 'fontsize': params.font_size, # 字体大小 + 'color': params.text_fore_color, # 字体颜色 + 'stroke_color': params.stroke_color, # 描边颜色 + 'stroke_width': params.stroke_width, # 描边宽度, 范围0-10 + 'bg_color': params.text_back_color, # 半透明黑色背景 + 'position': (params.subtitle_position, 0.2), # 距离顶部60%的位置 + 'method': 'caption' # 渲染方法 + } + + # 示例:自定义音量配置 + volume_config = { + 'original': params.original_volume, # 原声音量80% + 'bgm': params.bgm_volume, # BGM音量20% + 'narration': params.tts_volume or params.voice_volume, # 解说音量100% + } + font_path = utils.font_dir(params.font_name) + video.generate_video_v3( video_path=combined_video_path, - audio_path=final_audio, subtitle_path=subtitle_path, - output_file=final_video_path, - params=params, - list_script=list_script # 传入完整脚本以便处理OST + bgm_path=bgm_path, + narration_path=final_audio, + output_path=final_video_path, + volume_config=volume_config, # 添加音量配置 + subtitle_style=subtitle_style, + font_path=font_path ) _progress += 50 / 2 @@ -361,6 +365,40 @@ def start_subclip(task_id: str, params: VideoClipParams, subclip_path_videos: di return kwargs +def validate_params(video_path, audio_path, output_file, params): + """ + 验证输入参数 + Args: + video_path: 视频文件路径 + audio_path: 音频文件路径(可以为空字符串) + output_file: 输出文件路径 + params: 视频参数 + + Raises: + FileNotFoundError: 文件不存在时抛出 + ValueError: 参数无效时抛出 + """ + if not video_path: + raise ValueError("视频路径不能为空") + if not os.path.exists(video_path): + raise FileNotFoundError(f"视频文件不存在: {video_path}") + + # 如果提供了音频路径,则验证文件是否存在 + if audio_path and not os.path.exists(audio_path): + raise FileNotFoundError(f"音频文件不存在: {audio_path}") + + if not output_file: + raise ValueError("输出文件路径不能为空") + + # 确保输出目录存在 + output_dir = os.path.dirname(output_file) + if not os.path.exists(output_dir): + os.makedirs(output_dir) + + if not params: + raise ValueError("视频参数不能为空") + + if __name__ == "__main__": # task_id = "test123" # subclip_path_videos = {'00:41-01:58': 'E:\\projects\\NarratoAI\\storage\\cache_videos/vid-00_41-01_58.mp4', diff --git a/app/services/video.py b/app/services/video.py index eadfce0..f840c66 100644 --- a/app/services/video.py +++ b/app/services/video.py @@ -1,186 +1,22 @@ -import re -import os -import glob -import random -from typing import List -from typing import Union import traceback +import pysrt +from typing import Optional +from typing import List from loguru import logger from moviepy.editor import * -from moviepy.video.tools.subtitles import SubtitlesClip from PIL import ImageFont from contextlib import contextmanager - -from app.models import const -from app.models.schema import MaterialInfo, VideoAspect, VideoConcatMode, VideoParams, VideoClipParams -from app.utils import utils +from moviepy.editor import ( + VideoFileClip, + AudioFileClip, + TextClip, + CompositeVideoClip, + CompositeAudioClip +) -def get_bgm_file(bgm_type: str = "random", bgm_file: str = ""): - """ - 获取背景音乐文件路径 - Args: - bgm_type: 背景音乐类型,可选值: random(随机), ""(无背景音乐) - bgm_file: 指定的背景音乐文件路径 - - Returns: - str: 背景音乐文件路径 - """ - if not bgm_type: - return "" - - if bgm_file and os.path.exists(bgm_file): - return bgm_file - - if bgm_type == "random": - song_dir = utils.song_dir() - - # 检查目录是否存在 - if not os.path.exists(song_dir): - logger.warning(f"背景音乐目录不存在: {song_dir}") - return "" - - # 支持 mp3 和 flac 格式 - mp3_files = glob.glob(os.path.join(song_dir, "*.mp3")) - flac_files = glob.glob(os.path.join(song_dir, "*.flac")) - files = mp3_files + flac_files - - # 检查是否找到音乐文件 - if not files: - logger.warning(f"在目录 {song_dir} 中没有找到 MP3 或 FLAC 文件") - return "" - - return random.choice(files) - - return "" - - -def combine_videos( - combined_video_path: str, - video_paths: List[str], - audio_file: str, - video_aspect: VideoAspect = VideoAspect.portrait, - video_concat_mode: VideoConcatMode = VideoConcatMode.random, - max_clip_duration: int = 5, - threads: int = 2, -) -> str: - """ - 合并多个视频片段 - Args: - combined_video_path: 合并后的视频保存路径 - video_paths: 待合并的视频路径列表 - audio_file: 音频文件路径 - video_aspect: 视频宽高比 - video_concat_mode: 视频拼接模式(随机/顺序) - max_clip_duration: 每个片段的最大时长(秒) - threads: 处理线程数 - - Returns: - str: 合并后的视频路径 - """ - audio_clip = AudioFileClip(audio_file) - audio_duration = audio_clip.duration - logger.info(f"音频时长: {audio_duration} 秒") - # 每个片段的所需时长 - req_dur = audio_duration / len(video_paths) - req_dur = max_clip_duration - logger.info(f"每个片段最大时长: {req_dur} 秒") - output_dir = os.path.dirname(combined_video_path) - - aspect = VideoAspect(video_aspect) - video_width, video_height = aspect.to_resolution() - - clips = [] - video_duration = 0 - - raw_clips = [] - for video_path in video_paths: - clip = VideoFileClip(video_path).without_audio() - clip_duration = clip.duration - start_time = 0 - - while start_time < clip_duration: - end_time = min(start_time + max_clip_duration, clip_duration) - split_clip = clip.subclip(start_time, end_time) - raw_clips.append(split_clip) - # logger.info(f"从 {start_time:.2f} 到 {end_time:.2f}, 片段时长 {clip_duration:.2f}, 分割片段时长 {split_clip.duration:.2f}") - start_time = end_time - if video_concat_mode.value == VideoConcatMode.sequential.value: - break - - # 随机视频片段顺序 - if video_concat_mode.value == VideoConcatMode.random.value: - random.shuffle(raw_clips) - - # 添加下载的片段,直到音频时长(max_duration)达到 - while video_duration < audio_duration: - for clip in raw_clips: - # 检查片段是否比剩余音频时长长 - if (audio_duration - video_duration) < clip.duration: - clip = clip.subclip(0, (audio_duration - video_duration)) - # 仅当计算的片段时长(req_dur)小于实际片段时长时,缩短片段 - elif req_dur < clip.duration: - clip = clip.subclip(0, req_dur) - clip = clip.set_fps(30) - - # Not all videos are same size, so we need to resize them - clip_w, clip_h = clip.size - if clip_w != video_width or clip_h != video_height: - clip_ratio = clip.w / clip.h - video_ratio = video_width / video_height - - if clip_ratio == video_ratio: - # 等比例缩放 - clip = clip.resize((video_width, video_height)) - else: - # 等比缩放视频 - if clip_ratio > video_ratio: - # 按照目标宽度等比缩放 - scale_factor = video_width / clip_w - else: - # 按照目标高度等比缩放 - scale_factor = video_height / clip_h - - new_width = int(clip_w * scale_factor) - new_height = int(clip_h * scale_factor) - clip_resized = clip.resize(newsize=(new_width, new_height)) - - background = ColorClip( - size=(video_width, video_height), color=(0, 0, 0) - ) - clip = CompositeVideoClip( - [ - background.set_duration(clip.duration), - clip_resized.set_position("center"), - ] - ) - - logger.info( - f"调整视频尺寸为 {video_width} x {video_height}, 片段尺寸: {clip_w} x {clip_h}" - ) - - if clip.duration > max_clip_duration: - clip = clip.subclip(0, max_clip_duration) - - clips.append(clip) - video_duration += clip.duration - - video_clip = concatenate_videoclips(clips) - video_clip = video_clip.set_fps(30) - logger.info("writing") - - video_clip.write_videofile( - filename=combined_video_path, - threads=threads, - logger=None, - temp_audiofile_path=output_dir, - audio_codec="aac", - fps=30, - ) - video_clip.close() - logger.success("completed") - return combined_video_path +from app.models.schema import VideoAspect, SubtitlePosition def wrap_text(text, max_width, font, fontsize=60): @@ -269,259 +105,6 @@ def manage_clip(clip): del clip -def generate_video_v2( - video_path: str, - audio_path: str, - subtitle_path: str, - output_file: str, - list_script: list, - params: Union[VideoParams, VideoClipParams], - progress_callback=None, -): - """ - 合并所有素材 - Args: - video_path: 视频路径 - audio_path: 单个音频文件路径 - subtitle_path: 字幕文件路径 - output_file: 输出文件路径 - params: 视频参数 - progress_callback: 进度回调函数,接收 0-100 的进度值 - - Returns: - - """ - total_steps = 4 - current_step = 0 - - def update_progress(step_name): - nonlocal current_step - current_step += 1 - if progress_callback: - progress_callback(int(current_step * 100 / total_steps)) - logger.info(f"完成步骤: {step_name}") - - try: - validate_params(video_path, audio_path, output_file, params) - - with manage_clip(VideoFileClip(video_path)) as video_clip: - aspect = VideoAspect(params.video_aspect) - video_width, video_height = aspect.to_resolution() - - logger.info(f"开始,视频尺寸: {video_width} x {video_height}") - logger.info(f" ① 视频: {video_path}") - logger.info(f" ② 音频: {audio_path}") - logger.info(f" ③ 字幕: {subtitle_path}") - logger.info(f" ④ 输出: {output_file}") - - output_dir = os.path.dirname(output_file) - update_progress("初始化完成") - - # 字体设置 - font_path = "" - if params.subtitle_enabled: - if not params.font_name: - params.font_name = "STHeitiMedium.ttc" - font_path = os.path.join(utils.font_dir(), params.font_name) - if os.name == "nt": - font_path = font_path.replace("\\", "/") - logger.info(f"使用字体: {font_path}") - - def create_text_clip(subtitle_item): - phrase = subtitle_item[1] - max_width = video_width * 0.9 - wrapped_txt, txt_height = wrap_text( - phrase, max_width=max_width, font=font_path, fontsize=params.font_size - ) - _clip = TextClip( - wrapped_txt, - font=font_path, - fontsize=params.font_size, - color=params.text_fore_color, - bg_color=params.text_background_color, - stroke_color=params.stroke_color, - stroke_width=params.stroke_width, - print_cmd=False, - ) - duration = subtitle_item[0][1] - subtitle_item[0][0] - _clip = _clip.set_start(subtitle_item[0][0]) - _clip = _clip.set_end(subtitle_item[0][1]) - _clip = _clip.set_duration(duration) - - if params.subtitle_position == "bottom": - _clip = _clip.set_position(("center", video_height * 0.95 - _clip.h)) - elif params.subtitle_position == "top": - _clip = _clip.set_position(("center", video_height * 0.05)) - elif params.subtitle_position == "custom": - margin = 10 - max_y = video_height - _clip.h - margin - min_y = margin - custom_y = (video_height - _clip.h) * (params.custom_position / 100) - custom_y = max(min_y, min(custom_y, max_y)) - _clip = _clip.set_position(("center", custom_y)) - else: # center - _clip = _clip.set_position(("center", "center")) - return _clip - - update_progress("字体设置完成") - - # 处理音频 - original_audio = video_clip.audio - video_duration = video_clip.duration - new_audio = AudioFileClip(audio_path) - final_audio = process_audio_tracks(original_audio, new_audio, params, video_duration) - update_progress("音频处理完成") - - # 处理字幕 - if subtitle_path and os.path.exists(subtitle_path): - video_clip = process_subtitles(subtitle_path, video_clip, video_duration, create_text_clip) - update_progress("字幕处理完成") - - # 合并音频和导出 - logger.info("开始导出视频 (此步骤耗时较长请耐心等待)") - video_clip = video_clip.set_audio(final_audio) - video_clip.write_videofile( - output_file, - audio_codec="aac", - temp_audiofile=os.path.join(output_dir, "temp-audio.m4a"), - threads=params.n_threads, - logger=None, - fps=30, - ) - - except FileNotFoundError as e: - logger.error(f"文件不存在: {str(e)}") - raise - except Exception as e: - logger.error(f"视频生成失败: {str(e)}") - raise - finally: - logger.success("完成") - - -def process_audio_tracks(original_audio, new_audio, params, video_duration): - """ - 处理所有音轨(原声、配音、背景音乐) - Args: - original_audio: 原始音频 - new_audio: 新音频 - params: 视频参数 - video_duration: 视频时长 - - Returns: - CompositeAudioClip: 合成后的音频 - """ - audio_tracks = [] - - if original_audio is not None: - audio_tracks.append(original_audio) - - new_audio = new_audio.volumex(params.voice_volume) - audio_tracks.append(new_audio) - - # 处理背景音乐 - bgm_file = get_bgm_file(bgm_type=params.bgm_type, bgm_file=params.bgm_file) - if bgm_file: - try: - bgm_clip = AudioFileClip(bgm_file).volumex(params.bgm_volume).audio_fadeout(3) - bgm_clip = afx.audio_loop(bgm_clip, duration=video_duration) - audio_tracks.append(bgm_clip) - except Exception as e: - logger.error(f"添加背景音乐失败: {str(e)}") - - return CompositeAudioClip(audio_tracks) if audio_tracks else new_audio - - -def process_subtitles(subtitle_path, video_clip, video_duration, create_text_clip): - """ - 处理字幕 - Args: - subtitle_path: 字幕文件路径 - video_clip: 视频片段 - video_duration: 视频时长 - create_text_clip: 创建文本片段的回调函数 - - Returns: - CompositeVideoClip: 添加字幕后的视频 - """ - if not (subtitle_path and os.path.exists(subtitle_path)): - return video_clip - - sub = SubtitlesClip(subtitles=subtitle_path, encoding="utf-8") - text_clips = [] - - for item in sub.subtitles: - clip = create_text_clip(subtitle_item=item) - - # 时间范围调整 - start_time = max(clip.start, 0) - if start_time >= video_duration: - continue - - end_time = min(clip.end, video_duration) - clip = clip.set_start(start_time).set_end(end_time) - text_clips.append(clip) - - logger.info(f"处理了 {len(text_clips)} 段字幕") - return CompositeVideoClip([video_clip, *text_clips]) - - -def preprocess_video(materials: List[MaterialInfo], clip_duration=4): - """ - 预处理视频素材 - Args: - materials: 素材信息列表 - clip_duration: 片段时长(秒) - - Returns: - List[MaterialInfo]: 处理后的素材信息列表 - """ - for material in materials: - if not material.url: - continue - - ext = utils.parse_extension(material.url) - try: - clip = VideoFileClip(material.url) - except Exception: - clip = ImageClip(material.url) - - width = clip.size[0] - height = clip.size[1] - if width < 480 or height < 480: - logger.warning(f"video is too small, width: {width}, height: {height}") - continue - - if ext in const.FILE_TYPE_IMAGES: - logger.info(f"processing image: {material.url}") - # 创建一个图片剪辑,并设置持续时间为3秒钟 - clip = ( - ImageClip(material.url) - .set_duration(clip_duration) - .set_position("center") - ) - # 使用resize方法来添加缩放效果。这里使用了lambda函数来使得缩放效果随时间变化。 - # 假设我们想要从原始大小逐渐放大到120%的大小。 - # t代表当前时间,clip.duration为视频总时长,这里是3秒。 - # 注意:1 表示100%的大小所以1.2表示120%的大小 - zoom_clip = clip.resize( - lambda t: 1 + (clip_duration * 0.03) * (t / clip.duration) - ) - - # 如果需要,可以创建一个包含缩放剪辑的复合频剪辑 - # (这在您想要在视频中添加其他元素时非常有用) - final_clip = CompositeVideoClip([zoom_clip]) - - # 输出视频 - video_file = f"{material.url}.mp4" - final_clip.write_videofile(video_file, fps=30, logger=None) - final_clip.close() - del final_clip - material.url = video_file - logger.success(f"completed: {video_file}") - return materials - - def combine_clip_videos(combined_video_path: str, video_paths: List[str], video_ost_list: List[int], @@ -588,7 +171,6 @@ def combine_clip_videos(combined_video_path: str, video_clip.write_videofile( filename=combined_video_path, threads=threads, - logger=None, audio_codec="aac", fps=30, temp_audiofile=os.path.join(output_dir, "temp-audio.m4a") @@ -640,101 +222,225 @@ def resize_video_with_padding(clip, target_width: int, target_height: int): ]) -def validate_params(video_path, audio_path, output_file, params): +def loop_audio_clip(audio_clip: AudioFileClip, target_duration: float) -> AudioFileClip: """ - 验证输入参数 - Args: - video_path: 视频文件路径 - audio_path: 音频文件路径 - output_file: 输出文件路径 - params: 视频参数 + 循环音频片段直到达到目标时长 - Raises: - FileNotFoundError: 文件不存在时抛出 - ValueError: 参数无效时抛出 + 参数: + audio_clip: 原始音频片段 + target_duration: 目标时长(秒) + 返回: + 循环后的音频片段 """ + # 计算需要循环的次数 + loops_needed = int(target_duration / audio_clip.duration) + 1 + + # 创建足够长的音频 + extended_audio = audio_clip + for _ in range(loops_needed - 1): + extended_audio = CompositeAudioClip([ + extended_audio, + audio_clip.set_start(extended_audio.duration) + ]) + + # 裁剪到目标时长 + return extended_audio.subclip(0, target_duration) + + +def calculate_subtitle_position(position, video_height: int, text_height: int = 0) -> tuple: + """ + 计算字幕在视频中的具体位置 + + Args: + position: 位置配置,可以是 SubtitlePosition 枚举值或表示距顶部百分比的浮点数 + video_height: 视频高度 + text_height: 字幕文本高度 + + Returns: + tuple: (x, y) 坐标 + """ + margin = 50 # 字幕距离边缘的边距 + + if isinstance(position, (int, float)): + # 百分比位置 + return ('center', int(video_height * position)) + + # 预设位置 + if position == SubtitlePosition.TOP: + return ('center', margin) + elif position == SubtitlePosition.CENTER: + return ('center', video_height // 2) + elif position == SubtitlePosition.BOTTOM: + return ('center', video_height - margin - text_height) + + # 默认底部 + return ('center', video_height - margin - text_height) + + +def generate_video_v3( + video_path: str, + subtitle_style: dict, + volume_config: dict, + subtitle_path: Optional[str] = None, + bgm_path: Optional[str] = None, + narration_path: Optional[str] = None, + output_path: str = "output.mp4", + font_path: Optional[str] = None +) -> None: + """ + 合并视频素材,包括视频、字幕、BGM和解说音频 + + 参数: + video_path: 原视频文件路径 + subtitle_path: SRT字幕文件路径(可选) + bgm_path: 背景音乐文件路径(可选) + narration_path: 解说音频文件路径(可选) + output_path: 输出文件路径 + volume_config: 音量配置字典,可包含以下键: + - original: 原声音量(0-1),默认1.0 + - bgm: BGM音量(0-1),默认0.3 + - narration: 解说音量(0-1),默认1.0 + subtitle_style: 字幕样式配置字典,可包含以下键: + - font: 字体名称 + - fontsize: 字体大小 + - color: 字体颜色 + - stroke_color: 描边颜色 + - stroke_width: 描边宽度 + - bg_color: 背景色 + - position: 位置支持 SubtitlePosition 枚举值或 0-1 之间的浮点数(表示距顶部的百分比) + - method: 文字渲染方法 + font_path: 字体文件路径(.ttf/.otf 等格式) + """ + # 检查视频文件是否存在 if not os.path.exists(video_path): raise FileNotFoundError(f"视频文件不存在: {video_path}") - if not os.path.exists(audio_path): - raise FileNotFoundError(f"音频文件不存在: {audio_path}") + # 加载视频 + video = VideoFileClip(video_path) + subtitle_clips = [] - output_dir = os.path.dirname(output_file) - if not os.path.exists(output_dir): - raise FileNotFoundError(f"输出目录不存在: {output_dir}") + # 处理字幕(如果提供) + if subtitle_path: + if os.path.exists(subtitle_path): + # 检查字体文件 + if font_path and not os.path.exists(font_path): + logger.warning(f"警告:字体文件不存在: {font_path}") - if not hasattr(params, 'video_aspect'): - raise ValueError("params 缺少必要参数 video_aspect") + try: + subs = pysrt.open(subtitle_path) + logger.info(f"读取到 {len(subs)} 条字幕") + for index, sub in enumerate(subs): + start_time = sub.start.ordinal / 1000 + end_time = sub.end.ordinal / 1000 -if __name__ == "__main__": - combined_video_path = "../../storage/tasks/123/combined.mp4" + try: + # 检查字幕文本是否为空 + if not sub.text or sub.text.strip() == '': + logger.info(f"警告:第 {index + 1} 条字幕内容为空,已跳过") + continue - video_paths = ['../../storage/temp/clip_video/0b545e689a182a91af2163c7c0ca7ca3/vid-00-00-10_000-00-00-43_039.mp4', - '../../storage/temp/clip_video/0b545e689a182a91af2163c7c0ca7ca3/vid-00-00-45_439-00-01-01_600.mp4', - '../../storage/temp/clip_video/0b545e689a182a91af2163c7c0ca7ca3/vid-00-01-07_920-00-01-25_719.mp4', - '../../storage/temp/clip_video/0b545e689a182a91af2163c7c0ca7ca3/vid-00-01-36_959-00-01-53_719.mp4'] - video_ost_list = [2, 2, 2, 2] - list_script = [ - { - "timestamp": "00:10-00:43", - "picture": "好的,以下是视频画面的客观描述:\n\n视频显示一个男人在一个树木繁茂的地区,靠近一个泥土斜坡他穿着一件深色T恤、卡其色长裤和登山靴。他背着一个军绿色背包,里面似乎装有头和其他工具。\n\n第一个镜头显示该男子从远处走近斜坡,背对着镜头。下一个镜头特写显示了的背包,一个镐头从背包中伸出来。下一个镜头显示该男子用镐头敲打斜坡。下一个镜头是该男子脚上的特写镜头,他穿着登山靴,正站在泥土斜坡上。最后一个镜显示该男子在斜坡上,仔细地拨开树根和泥土。周围的环境是树木繁茂的,阳光透过树叶照射下来。土壤是浅棕色的,斜坡上有许多树根和植被。", - "narration": "(接上文)好吧,今天我们的男主角,背着一个看似随时要发射军绿色背包,竟然化身“泥土探险家”,在斜坡上挥舞着镐头!他这是准备挖宝还是给树根做个“美容”?阳光洒下来,简直是自然界的聚光灯,仿佛在说:“快来看看,这位勇士要挑战泥土极限!”我只能默默想,如果树根能说话,它们一定会喊:“别打我,我还有家人!”这就是生活,总有些搞笑的瞬间等着我们去发现!", - "OST": 2, - "new_timestamp": "00:00:00,000-00:00:33,000" - }, - { - "timestamp": "00:45-01:01", - "picture": "好的以下是视频画面的客观描述:\n\n视频显示了一个人在森林里挖掘。\n\n第一个镜头是地面特写,显示出松��的泥土、碎石和落叶。光线照在部分区域。\n\n第二个镜头中,一模糊不清的蹲一个树根旁挖掘,一个橄榄绿色的背包放在地上。树根缠绕着常春藤。\n\n第三个镜头显示该人在一个更开阔的区域挖掘,那里有一些树根,以及部分倒的树干。他起来像是在挖掘一个较大的坑。\n\n第四个镜头是特写镜头,显示该人用工具清理土坑的墙壁。\n\n第五个镜头是土坑内部的特写镜头,可以看到土质的纹理,有一些小树根和它植被的残留物。", - "narration": "现在,这位勇敢的挖掘者就像个“现代版的土豆农夫”,在林里开辟新天地。的目标是什么?挖一个宝藏还块“树根披萨”?小心哦,别让树根追着你喊:“不要挖我,我也是有故事的!”", - "OST": 2, - "new_timestamp": "00:00:33,000-00:00:49,000" - }, - { - "timestamp": "01:07-01:25", - "picture": "好,以下是视频画面的客观描述:\n\n画面1:特写镜头,显示出一丛带有水珠的深绿色灌木叶片。叶片呈椭圆形,边缘光滑。背景是树根和泥土。\n\n画面2:一个留着胡子的男人正在一个森林中土坑里挖掘。他穿着黑色T恤和卡其色裤子,跪在地,用具挖掘泥土。周围环绕着树木、树根和灌木。一个倒下的树干横跨土坑上方。\n\n画面3:同一个男人坐在他刚才挖的坑的边缘,看着前方。他的表情似乎略带沉思。背景与画面2相同。\n\n画面4:一个广角镜头显示出他挖出的坑。这是一个不规则形状的土坑,在树木繁茂的斜坡上。土壤呈深棕色,可见树根。\n\n画面5:同一个男人跪在地上,用一把小斧头砍一根木头。他穿着与前几个画面相同的衣服。地面上覆盖着落叶。周围是树木和灌木。", - "narration": "“哎呀,这片灌木叶子滴水如雨,感觉像是大自然的洗发水广告!但我这位‘挖宝达人’似乎更适合拍个‘森林里的单身狗’真人秀。等会儿,我要给树根唱首歌,听说它们爱音乐!”", - "OST": 2, - "new_timestamp": "00:00:49,000-00:01:07,000" - }, - { - "timestamp": "01:36-01:53", - "picture": "好的,以下是视频画面内容的客观描述:\n\n视频包含三个镜头:\n\n**镜头一:**个小型、浅水池塘,位于树林中。池塘的水看起来浑浊,呈绿褐色。池塘周围遍布泥土和落叶。多根树枝和树干横跨池塘,部分浸没在水中。周围的植被茂密主要是深色树木和灌木。\n\n**镜头二:**距拍摄树深处,阳光透过树叶洒落在植被上。镜头中可见粗大的树干、树枝和各种绿叶植物。部分树枝似乎被砍断,切口可见。\n\n**镜头三:**近距离特写镜头,聚焦在树枝和绿叶上。叶片呈圆形,颜色为鲜绿色,有些叶片上有缺损。树枝颜色较深,呈现深褐色。背景是模糊的树林。\n", - "narration": "“好吧,看来我们的‘挖宝达人’终于找到了一‘宝藏’——一个色泽如同绿豆汤的池塘!我敢打赌,这里不仅是小鱼儿的游乐场更是树枝们的‘水疗中心’!下次来这里,我得带上浮潜装备!”", - "OST": 2, - "new_timestamp": "00:01:07,000-00:01:24,000" - } - ] - # 合并子视频 - # combine_clip_videos(combined_video_path=combined_video_path, video_paths=video_paths, video_ost_list=video_ost_list, list_script=list_script) + # 处理字幕文本:确保是字符串,并处理可能的列表情况 + if isinstance(sub.text, (list, tuple)): + subtitle_text = ' '.join(str(item) for item in sub.text if item is not None) + else: + subtitle_text = str(sub.text) - cfg = VideoClipParams() - cfg.video_aspect = VideoAspect.portrait - cfg.font_name = "STHeitiMedium.ttc" - cfg.font_size = 60 - cfg.stroke_color = "#000000" - cfg.stroke_width = 1.5 - cfg.text_fore_color = "#FFFFFF" - cfg.text_background_color = "transparent" - cfg.bgm_type = "random" - cfg.bgm_file = "" - cfg.bgm_volume = 1.0 - cfg.subtitle_enabled = True - cfg.subtitle_position = "bottom" - cfg.n_threads = 2 - cfg.video_volume = 1 + subtitle_text = subtitle_text.strip() - cfg.voice_volume = 1.0 + if not subtitle_text: + logger.info(f"警告:第 {index + 1} 条字幕处理后为空,已跳过") + continue - video_path = "../../storage/tasks/123/combined.mp4" - audio_path = "../../storage/tasks/123/final_audio.mp3" - subtitle_path = "../../storage/tasks/123/subtitle.srt" - output_file = "../../storage/tasks/123/final-123.mp4" + # 创建临时 TextClip 来获取文本高度 + temp_clip = TextClip( + subtitle_text, + font=font_path, + fontsize=subtitle_style['fontsize'], + color=subtitle_style['color'] + ) + text_height = temp_clip.h + temp_clip.close() + + # 计算字幕位置 + position = calculate_subtitle_position( + subtitle_style['position'], + video.h, + text_height + ) + + # 创建最终的 TextClip + text_clip = (TextClip( + subtitle_text, + font=font_path, + fontsize=subtitle_style['fontsize'], + color=subtitle_style['color'] + ) + .set_position(position) + .set_duration(end_time - start_time) + .set_start(start_time)) + subtitle_clips.append(text_clip) + + except Exception as e: + logger.error(f"警告:创建第 {index + 1} 条字幕时出错: {traceback.format_exc()}") + + logger.info(f"成功创建 {len(subtitle_clips)} 条字幕剪辑") + except Exception as e: + logger.info(f"警告:处理字幕文件时出错: {str(e)}") + else: + logger.info(f"提示:字幕文件不存在: {subtitle_path}") + + # 合并音频 + audio_clips = [] + + # 添加原声(设置音量) + logger.debug(f"音量配置: {volume_config}") + if video.audio is not None: + original_audio = video.audio.volumex(volume_config['original']) + audio_clips.append(original_audio) + + # 添加BGM(如果提供) + if bgm_path: + bgm = AudioFileClip(bgm_path) + if bgm.duration < video.duration: + bgm = loop_audio_clip(bgm, video.duration) + else: + bgm = bgm.subclip(0, video.duration) + bgm = bgm.volumex(volume_config['bgm']) + audio_clips.append(bgm) + + # 添加解说音频(如果提供) + if narration_path: + narration = AudioFileClip(narration_path).volumex(volume_config['narration']) + audio_clips.append(narration) + + # 合成最终视频(包含字幕) + if subtitle_clips: + final_video = CompositeVideoClip([video] + subtitle_clips, size=video.size) + else: + logger.info("警告:没有字幕被添加到视频中") + final_video = video + + if audio_clips: + final_audio = CompositeAudioClip(audio_clips) + final_video = final_video.set_audio(final_audio) + + # 导出视频 + logger.info("开始导出视频...") # 调试信息 + final_video.write_videofile( + output_path, + codec='libx264', + audio_codec='aac', + fps=video.fps + ) + logger.info(f"视频已导出到: {output_path}") # 调试信息 + + # 清理资源 + video.close() + for clip in subtitle_clips: + clip.close() + if bgm_path: + bgm.close() + if narration_path: + narration.close() - generate_video_v2(video_path=video_path, - audio_path=audio_path, - subtitle_path=subtitle_path, - output_file=output_file, - params=cfg, - list_script=list_script, - ) diff --git a/app/services/voice.py b/app/services/voice.py index 5d6aa99..eba3c6d 100644 --- a/app/services/voice.py +++ b/app/services/voice.py @@ -7,7 +7,6 @@ import asyncio from loguru import logger from typing import List from datetime import datetime -from edge_tts.submaker import mktimestamp from xml.sax.saxutils import unescape from edge_tts import submaker, SubMaker from moviepy.video.tools import subtitles @@ -1199,7 +1198,7 @@ def azure_tts_v2(text: str, voice_name: str, voice_file: str) -> [SubMaker, None except Exception as e: logger.error(f"failed, error: {str(e)}") if i < 2: # 如果不是最后一次重试,则等待1秒 - time.sleep(1) + time.sleep(3) return None @@ -1318,96 +1317,6 @@ def create_subtitle_from_multiple(text: str, sub_maker_list: List[SubMaker], lis traceback.print_exc() -def create_subtitle(sub_maker: submaker.SubMaker, text: str, subtitle_file: str): - """ - 优化字幕文件 - 1. 将字幕文件按照标点符号分割成多行 - 2. 逐行匹配字幕文件中的文本 - 3. 生成新的字幕文件 - """ - - text = _format_text(text) - - def formatter(idx: int, start_time: float, end_time: float, sub_text: str) -> str: - """ - 1 - 00:00:00,000 --> 00:00:02,360 - 跑步是一项简单易行的运动 - """ - start_t = mktimestamp(start_time).replace(".", ",") - end_t = mktimestamp(end_time).replace(".", ",") - return f"{idx}\n" f"{start_t} --> {end_t}\n" f"{sub_text}\n" - - start_time = -1.0 - sub_items = [] - sub_index = 0 - - script_lines = utils.split_string_by_punctuations(text) - - def match_line(_sub_line: str, _sub_index: int): - if len(script_lines) <= _sub_index: - return "" - - _line = script_lines[_sub_index] - if _sub_line == _line: - return script_lines[_sub_index].strip() - - _sub_line_ = re.sub(r"[^\w\s]", "", _sub_line) - _line_ = re.sub(r"[^\w\s]", "", _line) - if _sub_line_ == _line_: - return _line_.strip() - - _sub_line_ = re.sub(r"\W+", "", _sub_line) - _line_ = re.sub(r"\W+", "", _line) - if _sub_line_ == _line_: - return _line.strip() - - return "" - - sub_line = "" - - try: - for _, (offset, sub) in enumerate(zip(sub_maker.offset, sub_maker.subs)): - _start_time, end_time = offset - if start_time < 0: - start_time = _start_time - - sub = unescape(sub) - sub_line += sub - sub_text = match_line(sub_line, sub_index) - if sub_text: - sub_index += 1 - line = formatter( - idx=sub_index, - start_time=start_time, - end_time=end_time, - sub_text=sub_text, - ) - sub_items.append(line) - start_time = -1.0 - sub_line = "" - - if len(sub_items) == len(script_lines): - with open(subtitle_file, "w", encoding="utf-8") as file: - file.write("\n".join(sub_items) + "\n") - try: - sbs = subtitles.file_to_subtitles(subtitle_file, encoding="utf-8") - duration = max([tb for ((ta, tb), txt) in sbs]) - logger.info( - f"completed, subtitle file created: {subtitle_file}, duration: {duration}" - ) - except Exception as e: - logger.error(f"failed, error: {str(e)}") - os.remove(subtitle_file) - else: - logger.warning( - f"failed, sub_items len: {len(sub_items)}, script_lines len: {len(script_lines)}" - ) - - except Exception as e: - logger.error(f"failed, error: {str(e)}") - - def get_audio_duration(sub_maker: submaker.SubMaker): """ 获取音频时长 @@ -1466,20 +1375,3 @@ def tts_multiple(task_id: str, list_script: list, voice_name: str, voice_rate: f logger.info(f"已生成音频文件: {audio_file}") return audio_files, sub_maker_list - - -if __name__ == "__main__": - voice_name = "zh-CN-YunyangNeural" - # voice_name = "af-ZA-AdriNeural" - voice_name = parse_voice_name(voice_name) - print(voice_name) - - with open("../../resource/scripts/2024-1203-205442.json", 'r', encoding='utf-8') as f: - data = json.load(f) - - audio_files, sub_maker_list = tts_multiple(task_id="12312312", list_script=data, voice_name=voice_name, voice_rate=1, voice_pitch=1) - - full_text = " ".join([item['narration'] for item in data if not item['OST']]) - subtitle_file = os.path.join(utils.task_dir("12312312"), "subtitle_multiple.srt") - create_subtitle_from_multiple(full_text, sub_maker_list, data, subtitle_file) - print(f"生成的音频文件列表: {audio_files}") diff --git a/app/utils/utils.py b/app/utils/utils.py index db0d248..49d44be 100644 --- a/app/utils/utils.py +++ b/app/utils/utils.py @@ -117,6 +117,47 @@ def song_dir(sub_dir: str = ""): return d +def get_bgm_file(bgm_type: str = "random", bgm_file: str = ""): + """ + 获取背景音乐文件路径 + Args: + bgm_type: 背景音乐类型,可选值: random(随机), ""(无背景音乐) + bgm_file: 指定的背景音乐文件路径 + + Returns: + str: 背景音乐文件路径 + """ + import glob + import random + if not bgm_type: + return "" + + if bgm_file and os.path.exists(bgm_file): + return bgm_file + + if bgm_type == "random": + song_dir_path = song_dir() + + # 检查目录是否存在 + if not os.path.exists(song_dir_path): + logger.warning(f"背景音乐目录不存在: {song_dir_path}") + return "" + + # 支持 mp3 和 flac 格式 + mp3_files = glob.glob(os.path.join(song_dir_path, "*.mp3")) + flac_files = glob.glob(os.path.join(song_dir_path, "*.flac")) + files = mp3_files + flac_files + + # 检查是否找到音乐文件 + if not files: + logger.warning(f"在目录 {song_dir_path} 中没有找到 MP3 或 FLAC 文件") + return "" + + return random.choice(files) + + return "" + + def public_dir(sub_dir: str = ""): d = resource_dir(f"public") if sub_dir: @@ -339,7 +380,7 @@ def time_to_seconds(time_str: str) -> float: # 分割时间部分 parts = time_part.split(':') - + if len(parts) == 3: # HH:MM:SS h, m, s = map(float, parts) seconds = h * 3600 + m * 60 + s @@ -350,7 +391,7 @@ def time_to_seconds(time_str: str) -> float: seconds = float(parts[0]) return seconds + ms - + except (ValueError, IndexError) as e: logger.error(f"时间格式转换错误 {time_str}: {str(e)}") return 0.0 @@ -373,16 +414,16 @@ def calculate_total_duration(scenes): float: 总时长(秒) """ total_seconds = 0 - + for scene in scenes: start, end = scene['timestamp'].split('-') # 使用 time_to_seconds 函数处理更精确的时间格式 start_seconds = time_to_seconds(start) end_seconds = time_to_seconds(end) - + duration = end_seconds - start_seconds total_seconds += duration - + return total_seconds @@ -502,7 +543,7 @@ def clear_keyframes_cache(video_path: str = None): keyframes_dir = os.path.join(temp_dir(), "keyframes") if not os.path.exists(keyframes_dir): return - + if video_path: # 理指定视频的缓存 video_hash = md5(video_path + str(os.path.getmtime(video_path))) @@ -516,7 +557,7 @@ def clear_keyframes_cache(video_path: str = None): import shutil shutil.rmtree(keyframes_dir) logger.info("已清理所有关键帧缓存") - + except Exception as e: logger.error(f"清理关键帧缓存失败: {e}") @@ -527,15 +568,16 @@ def init_resources(): # 创建字体目录 font_dir = os.path.join(root_dir(), "resource", "fonts") os.makedirs(font_dir, exist_ok=True) - + # 检查字体文件 font_files = [ - ("SourceHanSansCN-Regular.otf", "https://github.com/adobe-fonts/source-han-sans/raw/release/OTF/SimplifiedChinese/SourceHanSansSC-Regular.otf"), + ("SourceHanSansCN-Regular.otf", + "https://github.com/adobe-fonts/source-han-sans/raw/release/OTF/SimplifiedChinese/SourceHanSansSC-Regular.otf"), ("simhei.ttf", "C:/Windows/Fonts/simhei.ttf"), # Windows 黑体 ("simkai.ttf", "C:/Windows/Fonts/simkai.ttf"), # Windows 楷体 ("simsun.ttc", "C:/Windows/Fonts/simsun.ttc"), # Windows 宋体 ] - + # 优先使用系统字体 system_font_found = False for font_name, source in font_files: @@ -547,16 +589,17 @@ def init_resources(): logger.info(f"已复制系统字体: {font_name}") system_font_found = True break - + # 如果没有找到系统字体,则下载思源黑体 if not system_font_found: source_han_path = os.path.join(font_dir, "SourceHanSansCN-Regular.otf") if not os.path.exists(source_han_path): download_font(font_files[0][1], source_han_path) - + except Exception as e: logger.error(f"初始化资源文件失败: {e}") + def download_font(url: str, font_path: str): """下载字体文件""" try: @@ -564,16 +607,17 @@ def download_font(url: str, font_path: str): import requests response = requests.get(url) response.raise_for_status() - + with open(font_path, 'wb') as f: f.write(response.content) - + logger.info(f"字体文件下载成功: {font_path}") - + except Exception as e: logger.error(f"下载字体文件失败: {e}") raise + def init_imagemagick(): """初始化 ImageMagick 配置""" try: @@ -583,10 +627,10 @@ def init_imagemagick(): if result.returncode != 0: logger.error("ImageMagick 未安装或配置不正确") return False - + # 设置 IMAGEMAGICK_BINARY 环境变量 os.environ['IMAGEMAGICK_BINARY'] = 'magick' - + return True except Exception as e: logger.error(f"初始化 ImageMagick 失败: {str(e)}") diff --git a/config.example.toml b/config.example.toml index c9702f4..b0bf970 100644 --- a/config.example.toml +++ b/config.example.toml @@ -11,8 +11,13 @@ vision_gemini_api_key = "" vision_gemini_model_name = "gemini-1.5-flash" + ########## Vision Qwen API Key + vision_qwenvl_api_key = "" + vision_qwenvl_model_name = "qwen-vl-max-latest" + vision_qwenvl_base_url = "https://dashscope.aliyuncs.com/compatible-mode/v1" + ########### Vision NarratoAPI Key - narrato_api_key = "" + narrato_api_key = "0N0iEjU77aTqPW4d9YHCmTW2mPrfgWjDmaWAz1lTVTM" narrato_api_url = "https://narratoinsight.scsmtech.cn/api/v1" narrato_vision_model = "gemini-1.5-flash" narrato_vision_key = "" @@ -32,9 +37,7 @@ ########## OpenAI API Key # Get your API key at https://platform.openai.com/api-keys text_openai_api_key = "" - # No need to set it unless you want to use your own proxy - text_openai_base_url = "" - # Check your available models at https://platform.openai.com/account/limits + text_openai_base_url = "https://api.openai.com/v1" text_openai_model_name = "gpt-4o-mini" ########## Moonshot API Key @@ -66,7 +69,8 @@ # https://tongyi.aliyun.com/qianwen/ # https://help.aliyun.com/zh/dashscope/developer-reference/model-introduction text_qwen_api_key = "" - text_qwen_model_name = "qwen-max" + text_qwen_model_name = "qwen-plus-1127" + text_qwen_base_url = "https://dashscope.aliyuncs.com/compatible-mode/v1" ########## DeepSeek API Key # Visit https://platform.deepseek.com/api_keys to get your API key diff --git a/requirements.txt b/requirements.txt index f98c399..55c7972 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,7 +1,6 @@ requests~=2.31.0 moviepy==2.0.0.dev2 faster-whisper~=1.0.1 -edge_tts~=6.1.15 uvicorn~=0.27.1 fastapi~=0.115.4 tomli~=2.0.1 @@ -35,3 +34,5 @@ tiktoken==0.8.0 yt-dlp==2024.11.18 pysrt==1.1.2 httpx==0.27.2 +transformers==4.47.0 +edge-tts==6.1.19 diff --git a/webui/components/audio_settings.py b/webui/components/audio_settings.py index f81effe..6e378d9 100644 --- a/webui/components/audio_settings.py +++ b/webui/components/audio_settings.py @@ -6,23 +6,25 @@ from app.services import voice from app.utils import utils from webui.utils.cache import get_songs_cache + def render_audio_panel(tr): """渲染音频设置面板""" with st.container(border=True): st.write(tr("Audio Settings")) - + # 渲染TTS设置 render_tts_settings(tr) - + # 渲染背景音乐设置 render_bgm_settings(tr) + def render_tts_settings(tr): """渲染TTS(文本转语音)设置""" # 获取支持的语音列表 support_locales = ["zh-CN"] voices = voice.get_all_azure_voices(filter_locals=support_locales) - + # 创建友好的显示名称 friendly_names = { v: v.replace("Female", tr("Female")) @@ -30,11 +32,11 @@ def render_tts_settings(tr): .replace("Neural", "") for v in voices } - + # 获取保存的语音设置 saved_voice_name = config.ui.get("voice_name", "") saved_voice_name_index = 0 - + if saved_voice_name in friendly_names: saved_voice_name_index = list(friendly_names.keys()).index(saved_voice_name) else: @@ -56,7 +58,7 @@ def render_tts_settings(tr): voice_name = list(friendly_names.keys())[ list(friendly_names.values()).index(selected_friendly_name) ] - + # 保存设置 config.ui["voice_name"] = voice_name @@ -70,34 +72,40 @@ def render_tts_settings(tr): # 试听按钮 render_voice_preview(tr, voice_name) + def render_azure_v2_settings(tr): """渲染Azure V2语音设置""" saved_azure_speech_region = config.azure.get("speech_region", "") saved_azure_speech_key = config.azure.get("speech_key", "") - + azure_speech_region = st.text_input( - tr("Speech Region"), + tr("Speech Region"), value=saved_azure_speech_region ) azure_speech_key = st.text_input( - tr("Speech Key"), - value=saved_azure_speech_key, + tr("Speech Key"), + value=saved_azure_speech_key, type="password" ) - + config.azure["speech_region"] = azure_speech_region config.azure["speech_key"] = azure_speech_key + def render_voice_parameters(tr): """渲染语音参数设置""" # 音量 - voice_volume = st.selectbox( + voice_volume = st.slider( tr("Speech Volume"), - options=[0.6, 0.8, 1.0, 1.2, 1.5, 2.0, 3.0, 4.0, 5.0], - index=2, + min_value=0.0, + max_value=1.0, + value=1.0, + step=0.01, + help=tr("Adjust the volume of the original audio") ) st.session_state['voice_volume'] = voice_volume + # 语速 voice_rate = st.selectbox( tr("Speech Rate"), @@ -114,6 +122,7 @@ def render_voice_parameters(tr): ) st.session_state['voice_pitch'] = voice_pitch + def render_voice_preview(tr, voice_name): """渲染语音试听功能""" if st.button(tr("Play Voice")): @@ -122,11 +131,11 @@ def render_voice_preview(tr, voice_name): play_content = st.session_state.get('video_script', '') if not play_content: play_content = tr("Voice Example") - + with st.spinner(tr("Synthesizing Voice")): temp_dir = utils.storage_dir("temp", create=True) audio_file = os.path.join(temp_dir, f"tmp-voice-{str(uuid4())}.mp3") - + sub_maker = voice.tts( text=play_content, voice_name=voice_name, @@ -134,7 +143,7 @@ def render_voice_preview(tr, voice_name): voice_pitch=st.session_state.get('voice_pitch', 1.0), voice_file=audio_file, ) - + # 如果语音文件生成失败,使用默认内容重试 if not sub_maker: play_content = "This is a example voice. if you hear this, the voice synthesis failed with the original content." @@ -151,6 +160,7 @@ def render_voice_preview(tr, voice_name): if os.path.exists(audio_file): os.remove(audio_file) + def render_bgm_settings(tr): """渲染背景音乐设置""" # 背景音乐选项 @@ -159,14 +169,14 @@ def render_bgm_settings(tr): (tr("Random Background Music"), "random"), (tr("Custom Background Music"), "custom"), ] - + selected_index = st.selectbox( tr("Background Music"), index=1, options=range(len(bgm_options)), format_func=lambda x: bgm_options[x][0], ) - + # 获取选择的背景音乐类型 bgm_type = bgm_options[selected_index][1] st.session_state['bgm_type'] = bgm_type @@ -176,15 +186,19 @@ def render_bgm_settings(tr): custom_bgm_file = st.text_input(tr("Custom Background Music File")) if custom_bgm_file and os.path.exists(custom_bgm_file): st.session_state['bgm_file'] = custom_bgm_file - + # 背景音乐音量 - bgm_volume = st.selectbox( + bgm_volume = st.slider( tr("Background Music Volume"), - options=[0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0], - index=2, + min_value=0.0, + max_value=1.0, + value=0.3, + step=0.01, + help=tr("Adjust the volume of the original audio") ) st.session_state['bgm_volume'] = bgm_volume + def get_audio_params(): """获取音频参数""" return { @@ -194,5 +208,5 @@ def get_audio_params(): 'voice_pitch': st.session_state.get('voice_pitch', 1.0), 'bgm_type': st.session_state.get('bgm_type', 'random'), 'bgm_file': st.session_state.get('bgm_file', ''), - 'bgm_volume': st.session_state.get('bgm_volume', 0.2), - } \ No newline at end of file + 'bgm_volume': st.session_state.get('bgm_volume', 0.3), + } diff --git a/webui/components/basic_settings.py b/webui/components/basic_settings.py index d7b5144..5b00009 100644 --- a/webui/components/basic_settings.py +++ b/webui/components/basic_settings.py @@ -149,6 +149,7 @@ def test_vision_model_connection(api_key, base_url, model_name, provider, tr): else: return False, f"{tr('Unsupported provider')}: {provider}" + def render_vision_llm_settings(tr): """渲染视频分析模型设置""" st.subheader(tr("Vision Model Settings")) @@ -196,7 +197,7 @@ def render_vision_llm_settings(tr): elif vision_provider == 'qwenvl': st_vision_base_url = st.text_input( tr("Vision Base URL"), - value=vision_base_url or "https://dashscope.aliyuncs.com/compatible-mode/v1", + value=vision_base_url, help=tr("Default: https://dashscope.aliyuncs.com/compatible-mode/v1") ) st_vision_model_name = st.text_input( diff --git a/webui/components/script_settings.py b/webui/components/script_settings.py index 23a0c53..db09908 100644 --- a/webui/components/script_settings.py +++ b/webui/components/script_settings.py @@ -2,12 +2,15 @@ import os import glob import json import time +import traceback import streamlit as st +from loguru import logger from app.config import config from app.models.schema import VideoClipParams from app.utils import utils, check_script from webui.tools.generate_script_docu import generate_script_docu +from webui.tools.generate_script_short import generate_script_short def render_script_panel(tr): @@ -34,6 +37,7 @@ def render_script_file(tr, params): script_list = [ (tr("None"), ""), (tr("Auto Generate"), "auto"), + (tr("Short Generate"), "short"), (tr("Upload Script"), "upload_script") # 新增上传脚本选项 ] @@ -216,7 +220,9 @@ def render_script_buttons(tr, params): script_path = st.session_state.get('video_clip_json_path', '') if script_path == "auto": button_name = tr("Generate Video Script") - elif script_path: + elif script_path == "short": + button_name = tr("Generate Short Video Script") + elif script_path.endswith("json"): button_name = tr("Load Video Script") else: button_name = tr("Please Select Script File") @@ -224,6 +230,8 @@ def render_script_buttons(tr, params): if st.button(button_name, key="script_action", disabled=not script_path): if script_path == "auto": generate_script_docu(tr, params) + elif script_path == "short": + generate_script_short(tr, params) else: load_script(tr, script_path) @@ -275,6 +283,7 @@ def load_script(tr, script_path): st.success(tr("Script loaded successfully")) st.rerun() except Exception as e: + logger.error(f"加载脚本文件时发生错误\n{traceback.format_exc()}") st.error(f"{tr('Failed to load script')}: {str(e)}") @@ -332,3 +341,14 @@ def crop_video(tr, params): time.sleep(2) progress_bar.empty() status_text.empty() + + +def get_script_params(): + """获取脚本参数""" + return { + 'video_language': st.session_state.get('video_language', ''), + 'video_clip_json_path': st.session_state.get('video_clip_json_path', ''), + 'video_origin_path': st.session_state.get('video_origin_path', ''), + 'video_name': st.session_state.get('video_name', ''), + 'video_plot': st.session_state.get('video_plot', '') + } diff --git a/webui/components/subtitle_settings.py b/webui/components/subtitle_settings.py index ba9d2c7..cb624dc 100644 --- a/webui/components/subtitle_settings.py +++ b/webui/components/subtitle_settings.py @@ -3,26 +3,28 @@ from app.config import config from webui.utils.cache import get_fonts_cache import os + def render_subtitle_panel(tr): """渲染字幕设置面板""" with st.container(border=True): st.write(tr("Subtitle Settings")) - + # 启用字幕选项 enable_subtitles = st.checkbox(tr("Enable Subtitles"), value=True) st.session_state['subtitle_enabled'] = enable_subtitles - + if enable_subtitles: render_font_settings(tr) render_position_settings(tr) render_style_settings(tr) + def render_font_settings(tr): """渲染字体设置""" # 获取字体列表 font_dir = os.path.join(os.path.dirname(os.path.dirname(os.path.dirname(__file__))), "resource", "fonts") font_names = get_fonts_cache(font_dir) - + # 获取保存的字体设置 saved_font_name = config.ui.get("font_name", "") saved_font_name_index = 0 @@ -38,7 +40,7 @@ def render_font_settings(tr): config.ui["font_name"] = font_name st.session_state['font_name'] = font_name - # 字体大小 + # 字体大小 和 字幕大小 font_cols = st.columns([0.3, 0.7]) with font_cols[0]: saved_text_fore_color = config.ui.get("text_fore_color", "#FFFFFF") @@ -53,13 +55,14 @@ def render_font_settings(tr): saved_font_size = config.ui.get("font_size", 60) font_size = st.slider( tr("Font Size"), - min_value=30, + min_value=20, max_value=100, value=saved_font_size ) config.ui["font_size"] = font_size st.session_state['font_size'] = font_size + def render_position_settings(tr): """渲染位置设置""" subtitle_positions = [ @@ -68,14 +71,14 @@ def render_position_settings(tr): (tr("Bottom"), "bottom"), (tr("Custom"), "custom"), ] - + selected_index = st.selectbox( tr("Position"), index=2, options=range(len(subtitle_positions)), format_func=lambda x: subtitle_positions[x][0], ) - + subtitle_position = subtitle_positions[selected_index][1] st.session_state['subtitle_position'] = subtitle_position @@ -94,27 +97,29 @@ def render_position_settings(tr): except ValueError: st.error(tr("Please enter a valid number")) + def render_style_settings(tr): """渲染样式设置""" stroke_cols = st.columns([0.3, 0.7]) - + with stroke_cols[0]: stroke_color = st.color_picker( tr("Stroke Color"), value="#000000" ) st.session_state['stroke_color'] = stroke_color - + with stroke_cols[1]: stroke_width = st.slider( tr("Stroke Width"), min_value=0.0, max_value=10.0, - value=1.5, - step=0.1 + value=1.0, + step=0.01 ) st.session_state['stroke_width'] = stroke_width + def get_subtitle_params(): """获取字幕参数""" return { @@ -126,4 +131,4 @@ def get_subtitle_params(): 'custom_position': st.session_state.get('custom_position', 70.0), 'stroke_color': st.session_state.get('stroke_color', '#000000'), 'stroke_width': st.session_state.get('stroke_width', 1.5), - } \ No newline at end of file + } diff --git a/webui/components/video_settings.py b/webui/components/video_settings.py index 7942bee..8a9b3f4 100644 --- a/webui/components/video_settings.py +++ b/webui/components/video_settings.py @@ -1,6 +1,7 @@ import streamlit as st from app.models.schema import VideoClipParams, VideoAspect + def render_video_panel(tr): """渲染视频配置面板""" with st.container(border=True): @@ -8,6 +9,7 @@ def render_video_panel(tr): params = VideoClipParams() render_video_config(tr, params) + def render_video_config(tr, params): """渲染视频配置""" # 视频比例 @@ -39,9 +41,22 @@ def render_video_config(tr, params): ) st.session_state['video_quality'] = video_qualities[quality_index][1] + # 原声音量 + params.original_volume = st.slider( + tr("Original Volume"), + min_value=0.0, + max_value=1.0, + value=0.7, + step=0.01, + help=tr("Adjust the volume of the original audio") + ) + st.session_state['original_volume'] = params.original_volume + + def get_video_params(): """获取视频参数""" return { 'video_aspect': st.session_state.get('video_aspect', VideoAspect.portrait.value), - 'video_quality': st.session_state.get('video_quality', '1080p') - } \ No newline at end of file + 'video_quality': st.session_state.get('video_quality', '1080p'), + 'original_volume': st.session_state.get('original_volume', 0.7) + } diff --git a/webui/i18n/zh.json b/webui/i18n/zh.json index 428fa8c..03169de 100644 --- a/webui/i18n/zh.json +++ b/webui/i18n/zh.json @@ -2,13 +2,12 @@ "Language": "简体中文", "Translation": { "Video Script Configuration": "**视频脚本配置**", - "Generate Video Script": "生成视频脚本", + "Generate Video Script": "AI生成画面解说脚本", "Video Subject": "视频主题(给定一个关键词,:red[AI自动生成]视频文案)", "Script Language": "生成视频脚本的语言(一般情况AI会自动根据你输入的主题语言输出)", "Script Files": "脚本文件", "Generate Video Script and Keywords": "点击使用AI根据**主题**生成 【视频文案】 和 【视频关键词】", "Auto Detect": "自动检测", - "Auto Generate": "自动生成", "Video Theme": "视频主题", "Generation Prompt": "自定义提示词", "Save Script": "保存脚本", @@ -188,6 +187,11 @@ "Transcription Failed": "转录失败", "Mergeable Files": "可合并文件数", "Subtitle Content": "字幕内容", - "Merge Result Preview": "合并结果预览" + "Merge Result Preview": "合并结果预览", + "Short Generate": "短剧混剪 (高燃剪辑, 当前只支持 gpt-4o 模型)", + "Generate Short Video Script": "AI生成短剧混剪脚本", + "Adjust the volume of the original audio": "调整原始音频的音量", + "Original Volume": "视频音量", + "Auto Generate": "纪录片解说 (画面解说)" } } diff --git a/webui/tools/base.py b/webui/tools/base.py index d6dc3de..06b749a 100644 --- a/webui/tools/base.py +++ b/webui/tools/base.py @@ -1,7 +1,11 @@ import os +import requests import streamlit as st from loguru import logger +from requests.adapters import HTTPAdapter +from urllib3.util.retry import Retry +from app.config import config from app.utils import gemini_analyzer, qwenvl_analyzer @@ -31,17 +35,6 @@ def create_vision_analyzer(provider, api_key, model, base_url): raise ValueError(f"不支持的视觉分析提供商: {provider}") -def get_script_params(): - """获取脚本参数""" - return { - 'video_language': st.session_state.get('video_language', ''), - 'video_clip_json_path': st.session_state.get('video_clip_json_path', ''), - 'video_origin_path': st.session_state.get('video_origin_path', ''), - 'video_name': st.session_state.get('video_name', ''), - 'video_plot': st.session_state.get('video_plot', '') - } - - def get_batch_timestamps(batch_files, prev_batch_files=None): """ 解析一批文件的时间戳范围,支持毫秒级精度 @@ -139,3 +132,32 @@ def get_batch_files(keyframe_files, result, batch_size=5): batch_start = result['batch_index'] * batch_size batch_end = min(batch_start + batch_size, len(keyframe_files)) return keyframe_files[batch_start:batch_end] + + +def chekc_video_config(video_params): + """ + 检查视频分析配置 + """ + headers = { + 'accept': 'application/json', + 'Content-Type': 'application/json' + } + session = requests.Session() + retry_strategy = Retry( + total=3, + backoff_factor=1, + status_forcelist=[500, 502, 503, 504] + ) + adapter = HTTPAdapter(max_retries=retry_strategy) + session.mount("https://", adapter) + try: + session.post( + f"{config.app.get('narrato_api_url')}/video/config", + headers=headers, + json=video_params, + timeout=30, + verify=True + ) + return True + except Exception as e: + return False diff --git a/webui/tools/generate_script_docu.py b/webui/tools/generate_script_docu.py index 2c72500..6552ebf 100644 --- a/webui/tools/generate_script_docu.py +++ b/webui/tools/generate_script_docu.py @@ -13,7 +13,7 @@ from urllib3.util.retry import Retry from app.config import config from app.utils.script_generator import ScriptProcessor from app.utils import utils, video_processor, video_processor_v2, qwenvl_analyzer -from webui.tools.base import create_vision_analyzer, get_batch_files, get_batch_timestamps +from webui.tools.base import create_vision_analyzer, get_batch_files, get_batch_timestamps, chekc_video_config def generate_script_docu(tr, params): @@ -117,8 +117,7 @@ def generate_script_docu(tr, params): elif vision_llm_provider == 'qwenvl': vision_api_key = st.session_state.get('vision_qwenvl_api_key') vision_model = st.session_state.get('vision_qwenvl_model_name', 'qwen-vl-max-latest') - vision_base_url = st.session_state.get('vision_qwenvl_base_url', - 'https://dashscope.aliyuncs.com/compatible-mode/v1') + vision_base_url = st.session_state.get('vision_qwenvl_base_url') else: raise ValueError(f"不支持的视觉分析提供商: {vision_llm_provider}") @@ -228,28 +227,7 @@ def generate_script_docu(tr, params): "text_model_name": text_model, "text_base_url": text_base_url or "" } - headers = { - 'accept': 'application/json', - 'Content-Type': 'application/json' - } - session = requests.Session() - retry_strategy = Retry( - total=3, - backoff_factor=1, - status_forcelist=[500, 502, 503, 504] - ) - adapter = HTTPAdapter(max_retries=retry_strategy) - session.mount("https://", adapter) - try: - response = session.post( - f"{config.app.get('narrato_api_url')}/video/config", - headers=headers, - json=api_params, - timeout=30, - verify=True - ) - except Exception as e: - pass + chekc_video_config(api_params) custom_prompt = st.session_state.get('custom_prompt', '') processor = ScriptProcessor( model_name=text_model, diff --git a/webui/tools/generate_script_short.py b/webui/tools/generate_script_short.py new file mode 100644 index 0000000..5400ff1 --- /dev/null +++ b/webui/tools/generate_script_short.py @@ -0,0 +1,85 @@ +import os +import json +import time +import asyncio +import traceback +import requests +import streamlit as st +from loguru import logger + +from app.config import config +from webui.tools.base import chekc_video_config + + +def generate_script_short(tr, params): + """ + 生成 纪录片 视频脚本 + """ + progress_bar = st.progress(0) + status_text = st.empty() + + def update_progress(progress: float, message: str = ""): + progress_bar.progress(progress) + if message: + status_text.text(f"{progress}% - {message}") + else: + status_text.text(f"进度: {progress}%") + + try: + with st.spinner("正在生成脚本..."): + text_provider = config.app.get('text_llm_provider', 'gemini').lower() + text_api_key = config.app.get(f'text_{text_provider}_api_key') + text_model = config.app.get(f'text_{text_provider}_model_name') + text_base_url = config.app.get(f'text_{text_provider}_base_url') + vision_api_key = st.session_state.get(f'vision_{text_provider}_api_key', "") + vision_model = st.session_state.get(f'vision_{text_provider}_model_name', "") + vision_base_url = st.session_state.get(f'vision_{text_provider}_base_url', "") + narrato_api_key = config.app.get('narrato_api_key') + + update_progress(20, "开始准备生成脚本") + + srt_path = params.video_origin_path.replace(".mp4", ".srt").replace("videos", "srt").replace("video", "subtitle") + if not os.path.exists(srt_path): + logger.error(f"{srt_path} 文件不存在请检查或重新转录") + st.error(f"{srt_path} 文件不存在请检查或重新转录") + st.stop() + + api_params = { + "vision_api_key": vision_api_key, + "vision_model_name": vision_model, + "vision_base_url": vision_base_url or "", + "text_api_key": text_api_key, + "text_model_name": text_model, + "text_base_url": text_base_url or "" + } + chekc_video_config(api_params) + from app.services.SDP.generate_script_short import generate_script + script = generate_script( + srt_path=srt_path, + output_path="resource/scripts/merged_subtitle.json", + api_key=text_api_key, + model_name=text_model, + base_url=text_base_url, + narrato_api_key=narrato_api_key, + bert_path="app/models/bert/", + ) + + if script is None: + st.error("生成脚本失败,请检查日志") + st.stop() + logger.info(f"脚本生成完成 {json.dumps(script, ensure_ascii=False, indent=4)}") + if isinstance(script, list): + st.session_state['video_clip_json'] = script + elif isinstance(script, str): + st.session_state['video_clip_json'] = json.loads(script) + update_progress(80, "脚本生成完成") + + time.sleep(0.1) + progress_bar.progress(100) + status_text.text("脚本生成完成!") + st.success("视频脚本生成成功!") + + except Exception as err: + progress_bar.progress(100) + st.error(f"生成过程中发生错误: {str(err)}") + logger.exception(f"生成脚本时发生错误\n{traceback.format_exc()}")