From f00880436934da4d7bab602053e5176711f68a2f Mon Sep 17 00:00:00 2001
From: linyqh <linyqemail@163.com>
Date: Wed, 20 Nov 2024 00:34:11 +0800
Subject: [PATCH] =?UTF-8?q?feat(subtitle):=20=E6=B7=BB=E5=8A=A0=E4=BB=8E?=
 =?UTF-8?q?=E8=A7=86=E9=A2=91=E6=8F=90=E5=8F=96=E9=9F=B3=E9=A2=91=E5=B9=B6?=
 =?UTF-8?q?=E7=94=9F=E6=88=90=E5=AD=97=E5=B9=95=E7=9A=84=E5=8A=9F=E8=83=BD?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- 新增 extract_audio_and_create_subtitle函数，用于从视频文件中提取音频并生成字幕文件
- 更新 video_pipeline.py，修改视频下载和处理的相关参数
---
 app/services/subtitle.py | 95 ++++++++++++++++++++++++++++++++--------
 video_pipeline.py        | 14 +++---
 2 files changed, 84 insertions(+), 25 deletions(-)

diff --git a/app/services/subtitle.py b/app/services/subtitle.py
index f37eb65..7b18e8d 100644
--- a/app/services/subtitle.py
+++ b/app/services/subtitle.py
@@ -8,6 +8,8 @@ from faster_whisper import WhisperModel
 from timeit import default_timer as timer
 from loguru import logger
 import google.generativeai as genai
+from moviepy.editor import VideoFileClip
+import os
 
 from app.config import config
 from app.utils import utils
@@ -362,29 +364,86 @@ def create_with_gemini(audio_file: str, subtitle_file: str = "", api_key: Option
         return None
 
 
+def extract_audio_and_create_subtitle(video_file: str, subtitle_file: str = "") -> Optional[str]:
+    """
+    从视频文件中提取音频并生成字幕文件。
+
+    参数:
+    - video_file: MP4视频文件的路径
+    - subtitle_file: 输出字幕文件的路径（可选）。如果未提供，将根据视频文件名自动生成。
+
+    返回:
+    - str: 生成的字幕文件路径
+    - None: 如果处理过程中出现错误
+    """
+    try:
+        # 获取视频文件所在目录
+        video_dir = os.path.dirname(video_file)
+        video_name = os.path.splitext(os.path.basename(video_file))[0]
+        
+        # 设置音频文件路径
+        audio_file = os.path.join(video_dir, f"{video_name}_audio.wav")
+        
+        # 如果未指定字幕文件路径，则自动生成
+        if not subtitle_file:
+            subtitle_file = os.path.join(video_dir, f"{video_name}.srt")
+        
+        logger.info(f"开始从视频提取音频: {video_file}")
+        
+        # 加载视频文件
+        video = VideoFileClip(video_file)
+        
+        # 提取音频并保存为WAV格式
+        logger.info(f"正在提取音频到: {audio_file}")
+        video.audio.write_audiofile(audio_file, codec='pcm_s16le')
+        
+        # 关闭视频文件
+        video.close()
+        
+        logger.info("音频提取完成，开始生成字幕")
+        
+        # 使用create函数生成字幕
+        create(audio_file, subtitle_file)
+        
+        # 删除临时音频文件
+        if os.path.exists(audio_file):
+            os.remove(audio_file)
+            logger.info("已清理临时音频文件")
+        
+        return subtitle_file
+        
+    except Exception as e:
+        logger.error(f"处理视频文件时出错: {str(e)}")
+        logger.error(traceback.format_exc())
+        return None
+
+
 if __name__ == "__main__":
-    task_id = "test456"
+    task_id = "12121"
     task_dir = utils.task_dir(task_id)
     subtitle_file = f"{task_dir}/subtitle.srt"
     audio_file = f"{task_dir}/audio.wav"
+    video_file = f"{task_dir}/duanju_demo.mp4"
 
-    subtitles = file_to_subtitles(subtitle_file)
-    print(subtitles)
+    extract_audio_and_create_subtitle(video_file, subtitle_file)
 
-    # script_file = f"{task_dir}/script.json"
-    # with open(script_file, "r") as f:
-    #     script_content = f.read()
-    # s = json.loads(script_content)
-    # script = s.get("script")
-    #
-    # correct(subtitle_file, script)
+    # subtitles = file_to_subtitles(subtitle_file)
+    # print(subtitles)
 
-    subtitle_file = f"{task_dir}/subtitle111.srt"
-    create(audio_file, subtitle_file)
+    # # script_file = f"{task_dir}/script.json"
+    # # with open(script_file, "r") as f:
+    # #     script_content = f.read()
+    # # s = json.loads(script_content)
+    # # script = s.get("script")
+    # #
+    # # correct(subtitle_file, script)
 
-    # # 使用Gemini模型处理音频
-    # gemini_api_key = config.app.get("gemini_api_key")  # 请替换为实际的API密钥
-    # gemini_subtitle_file = create_with_gemini(audio_file, api_key=gemini_api_key)
-    #
-    # if gemini_subtitle_file:
-    #     print(f"Gemini生成的字幕文件: {gemini_subtitle_file}")
+    # subtitle_file = f"{task_dir}/subtitle111.srt"
+    # create(audio_file, subtitle_file)
+
+    # # # 使用Gemini模型处理音频
+    # # gemini_api_key = config.app.get("gemini_api_key")  # 请替换为实际的API密钥
+    # # gemini_subtitle_file = create_with_gemini(audio_file, api_key=gemini_api_key)
+    # #
+    # # if gemini_subtitle_file:
+    # #     print(f"Gemini生成的字幕文件: {gemini_subtitle_file}")
diff --git a/video_pipeline.py b/video_pipeline.py
index 3e35544..5dca576 100644
--- a/video_pipeline.py
+++ b/video_pipeline.py
@@ -110,12 +110,12 @@ class VideoPipeline:
         """运行完整的pipeline"""
         try:
             current_path = os.path.dirname(os.path.abspath(__file__))
-            video_path = os.path.join(current_path, "resource", "videos", video_name)
+            video_path = os.path.join(current_path, "resource", "videos", f"{video_name}.mp4")
             # 判断视频是否存在
             if not os.path.exists(video_path):
                 # 1. 下载视频
                 print(f"视频不存在, 开始下载视频: {video_path}")
-                download_result = self.download_video(youtube_url=youtube_url, resolution="1080p", output_format="mp4", rename=video_name)
+                download_result = self.download_video(url=youtube_url, resolution="1080p", output_format="mp4", rename=video_name)
                 video_path = download_result["output_path"]
             else:
                 print(f"视频已存在: {video_path}")
@@ -168,12 +168,12 @@ class VideoPipeline:
 if __name__ == "__main__":
     pipeline = VideoPipeline()
     result = pipeline.run_pipeline(
-        task_id="test_123",
+        task_id="test_111901",
         script_name="test.json",
-        youtube_url="https://www.youtube.com/watch?v=Kenm35gdqtk",
-        video_name="test.mp4",
-        skip_seconds=0,
-        threshold=30,
+        youtube_url="https://www.youtube.com/watch?v=vLJ7Yed6FQ4",
+        video_name="2024-11-19-01",
+        skip_seconds=50,
+        threshold=35,
         vision_batch_size=10,
         vision_llm_provider="gemini",
         voice_name="zh-CN-YunjianNeural",