From 8eb961bc0ef4a4b3f1169a42d1d4cdb6ffefdd7d Mon Sep 17 00:00:00 2001 From: linyq Date: Mon, 18 Nov 2024 16:30:00 +0800 Subject: [PATCH 1/9] =?UTF-8?q?feat(app):=20=E6=96=B0=E5=A2=9E=E8=84=9A?= =?UTF-8?q?=E6=9C=AC=E7=94=9F=E6=88=90=20V2=20=E6=8E=A5=E5=8F=A3=E5=B9=B6?= =?UTF-8?q?=E9=87=8D=E6=9E=84=E7=9B=B8=E5=85=B3=E5=8A=9F=E8=83=BD?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - 新增 V2脚本生成接口和相关服务 - 重构脚本生成逻辑,提高可维护性和可扩展性 - 优化关键帧提取和处理流程 - 改进错误处理和日志记录 --- app/controllers/v1/video.py | 210 +++++++++--------- app/controllers/v2/base.py | 11 + app/controllers/v2/script.py | 45 ++++ app/models/schema_v2.py | 15 ++ app/router.py | 4 + app/services/script_service.py | 378 +++++++++++++++++++++++++++++++++ webui/i18n/zh.json | 1 - 7 files changed, 558 insertions(+), 106 deletions(-) create mode 100644 app/controllers/v2/base.py create mode 100644 app/controllers/v2/script.py create mode 100644 app/models/schema_v2.py create mode 100644 app/services/script_service.py diff --git a/app/controllers/v1/video.py b/app/controllers/v1/video.py index 0430707..336084f 100644 --- a/app/controllers/v1/video.py +++ b/app/controllers/v1/video.py @@ -163,109 +163,109 @@ def delete_video(request: Request, task_id: str = Path(..., description="Task ID ) -@router.get( - "/musics", response_model=BgmRetrieveResponse, summary="Retrieve local BGM files" -) -def get_bgm_list(request: Request): - suffix = "*.mp3" - song_dir = utils.song_dir() - files = glob.glob(os.path.join(song_dir, suffix)) - bgm_list = [] - for file in files: - bgm_list.append( - { - "name": os.path.basename(file), - "size": os.path.getsize(file), - "file": file, - } - ) - response = {"files": bgm_list} - return utils.get_response(200, response) +# @router.get( +# "/musics", response_model=BgmRetrieveResponse, summary="Retrieve local BGM files" +# ) +# def get_bgm_list(request: Request): +# suffix = "*.mp3" +# song_dir = utils.song_dir() +# files = glob.glob(os.path.join(song_dir, suffix)) +# bgm_list = [] +# for file in files: +# bgm_list.append( +# { +# "name": os.path.basename(file), +# "size": os.path.getsize(file), +# "file": file, +# } +# ) +# response = {"files": bgm_list} +# return utils.get_response(200, response) +# - -@router.post( - "/musics", - response_model=BgmUploadResponse, - summary="Upload the BGM file to the songs directory", -) -def upload_bgm_file(request: Request, file: UploadFile = File(...)): - request_id = base.get_task_id(request) - # check file ext - if file.filename.endswith("mp3"): - song_dir = utils.song_dir() - save_path = os.path.join(song_dir, file.filename) - # save file - with open(save_path, "wb+") as buffer: - # If the file already exists, it will be overwritten - file.file.seek(0) - buffer.write(file.file.read()) - response = {"file": save_path} - return utils.get_response(200, response) - - raise HttpException( - "", status_code=400, message=f"{request_id}: Only *.mp3 files can be uploaded" - ) - - -@router.get("/stream/{file_path:path}") -async def stream_video(request: Request, file_path: str): - tasks_dir = utils.task_dir() - video_path = os.path.join(tasks_dir, file_path) - range_header = request.headers.get("Range") - video_size = os.path.getsize(video_path) - start, end = 0, video_size - 1 - - length = video_size - if range_header: - range_ = range_header.split("bytes=")[1] - start, end = [int(part) if part else None for part in range_.split("-")] - if start is None: - start = video_size - end - end = video_size - 1 - if end is None: - end = video_size - 1 - length = end - start + 1 - - def file_iterator(file_path, offset=0, bytes_to_read=None): - with open(file_path, "rb") as f: - f.seek(offset, os.SEEK_SET) - remaining = bytes_to_read or video_size - while remaining > 0: - bytes_to_read = min(4096, remaining) - data = f.read(bytes_to_read) - if not data: - break - remaining -= len(data) - yield data - - response = StreamingResponse( - file_iterator(video_path, start, length), media_type="video/mp4" - ) - response.headers["Content-Range"] = f"bytes {start}-{end}/{video_size}" - response.headers["Accept-Ranges"] = "bytes" - response.headers["Content-Length"] = str(length) - response.status_code = 206 # Partial Content - - return response - - -@router.get("/download/{file_path:path}") -async def download_video(_: Request, file_path: str): - """ - download video - :param _: Request request - :param file_path: video file path, eg: /cd1727ed-3473-42a2-a7da-4faafafec72b/final-1.mp4 - :return: video file - """ - tasks_dir = utils.task_dir() - video_path = os.path.join(tasks_dir, file_path) - file_path = pathlib.Path(video_path) - filename = file_path.stem - extension = file_path.suffix - headers = {"Content-Disposition": f"attachment; filename={filename}{extension}"} - return FileResponse( - path=video_path, - headers=headers, - filename=f"{filename}{extension}", - media_type=f"video/{extension[1:]}", - ) +# @router.post( +# "/musics", +# response_model=BgmUploadResponse, +# summary="Upload the BGM file to the songs directory", +# ) +# def upload_bgm_file(request: Request, file: UploadFile = File(...)): +# request_id = base.get_task_id(request) +# # check file ext +# if file.filename.endswith("mp3"): +# song_dir = utils.song_dir() +# save_path = os.path.join(song_dir, file.filename) +# # save file +# with open(save_path, "wb+") as buffer: +# # If the file already exists, it will be overwritten +# file.file.seek(0) +# buffer.write(file.file.read()) +# response = {"file": save_path} +# return utils.get_response(200, response) +# +# raise HttpException( +# "", status_code=400, message=f"{request_id}: Only *.mp3 files can be uploaded" +# ) +# +# +# @router.get("/stream/{file_path:path}") +# async def stream_video(request: Request, file_path: str): +# tasks_dir = utils.task_dir() +# video_path = os.path.join(tasks_dir, file_path) +# range_header = request.headers.get("Range") +# video_size = os.path.getsize(video_path) +# start, end = 0, video_size - 1 +# +# length = video_size +# if range_header: +# range_ = range_header.split("bytes=")[1] +# start, end = [int(part) if part else None for part in range_.split("-")] +# if start is None: +# start = video_size - end +# end = video_size - 1 +# if end is None: +# end = video_size - 1 +# length = end - start + 1 +# +# def file_iterator(file_path, offset=0, bytes_to_read=None): +# with open(file_path, "rb") as f: +# f.seek(offset, os.SEEK_SET) +# remaining = bytes_to_read or video_size +# while remaining > 0: +# bytes_to_read = min(4096, remaining) +# data = f.read(bytes_to_read) +# if not data: +# break +# remaining -= len(data) +# yield data +# +# response = StreamingResponse( +# file_iterator(video_path, start, length), media_type="video/mp4" +# ) +# response.headers["Content-Range"] = f"bytes {start}-{end}/{video_size}" +# response.headers["Accept-Ranges"] = "bytes" +# response.headers["Content-Length"] = str(length) +# response.status_code = 206 # Partial Content +# +# return response +# +# +# @router.get("/download/{file_path:path}") +# async def download_video(_: Request, file_path: str): +# """ +# download video +# :param _: Request request +# :param file_path: video file path, eg: /cd1727ed-3473-42a2-a7da-4faafafec72b/final-1.mp4 +# :return: video file +# """ +# tasks_dir = utils.task_dir() +# video_path = os.path.join(tasks_dir, file_path) +# file_path = pathlib.Path(video_path) +# filename = file_path.stem +# extension = file_path.suffix +# headers = {"Content-Disposition": f"attachment; filename={filename}{extension}"} +# return FileResponse( +# path=video_path, +# headers=headers, +# filename=f"{filename}{extension}", +# media_type=f"video/{extension[1:]}", +# ) diff --git a/app/controllers/v2/base.py b/app/controllers/v2/base.py new file mode 100644 index 0000000..4612983 --- /dev/null +++ b/app/controllers/v2/base.py @@ -0,0 +1,11 @@ +from fastapi import APIRouter, Depends + + +def v2_router(dependencies=None): + router = APIRouter() + router.tags = ["V2"] + router.prefix = "/api/v2" + # 将认证依赖项应用于所有路由 + if dependencies: + router.dependencies = dependencies + return router diff --git a/app/controllers/v2/script.py b/app/controllers/v2/script.py new file mode 100644 index 0000000..85f4238 --- /dev/null +++ b/app/controllers/v2/script.py @@ -0,0 +1,45 @@ +from fastapi import APIRouter, BackgroundTasks +from loguru import logger + +from app.models.schema_v2 import GenerateScriptRequest, GenerateScriptResponse +from app.services.script_service import ScriptGenerator +from app.utils import utils +from app.controllers.v2.base import v2_router + +# router = APIRouter(prefix="/api/v2", tags=["Script Generation V2"]) +router = v2_router() + +@router.post( + "/scripts/generate", + response_model=GenerateScriptResponse, + summary="生成视频脚本 (V2)" +) +async def generate_script( + request: GenerateScriptRequest, + background_tasks: BackgroundTasks +): + """ + 生成视频脚本的V2版本API + """ + task_id = utils.get_uuid() + + try: + generator = ScriptGenerator() + script = await generator.generate_script( + video_path=request.video_path, + video_theme=request.video_theme, + custom_prompt=request.custom_prompt, + skip_seconds=request.skip_seconds, + threshold=request.threshold, + vision_batch_size=request.vision_batch_size, + vision_llm_provider=request.vision_llm_provider + ) + + return { + "task_id": task_id, + "script": script + } + + except Exception as e: + logger.exception(f"Generate script failed: {str(e)}") + raise \ No newline at end of file diff --git a/app/models/schema_v2.py b/app/models/schema_v2.py new file mode 100644 index 0000000..786c018 --- /dev/null +++ b/app/models/schema_v2.py @@ -0,0 +1,15 @@ +from typing import Optional, List +from pydantic import BaseModel + +class GenerateScriptRequest(BaseModel): + video_path: str + video_theme: Optional[str] = "" + custom_prompt: Optional[str] = "" + skip_seconds: Optional[int] = 0 + threshold: Optional[int] = 30 + vision_batch_size: Optional[int] = 5 + vision_llm_provider: Optional[str] = "gemini" + +class GenerateScriptResponse(BaseModel): + task_id: str + script: List[dict] \ No newline at end of file diff --git a/app/router.py b/app/router.py index cf84037..df60500 100644 --- a/app/router.py +++ b/app/router.py @@ -10,8 +10,12 @@ Resources: from fastapi import APIRouter from app.controllers.v1 import llm, video +from app.controllers.v2 import script root_api_router = APIRouter() # v1 root_api_router.include_router(video.router) root_api_router.include_router(llm.router) + +# v2 +root_api_router.include_router(script.router) diff --git a/app/services/script_service.py b/app/services/script_service.py new file mode 100644 index 0000000..1693cbc --- /dev/null +++ b/app/services/script_service.py @@ -0,0 +1,378 @@ +import os +import json +import time +import asyncio +import requests +from loguru import logger +from typing import List, Dict, Any, Callable + +from app.utils import utils, vision_analyzer, video_processor, video_processor_v2 +from app.utils.script_generator import ScriptProcessor +from app.config import config + + +class ScriptGenerator: + def __init__(self): + self.temp_dir = utils.temp_dir() + self.keyframes_dir = os.path.join(self.temp_dir, "keyframes") + + async def generate_script( + self, + video_path: str, + video_theme: str = "", + custom_prompt: str = "", + skip_seconds: int = 0, + threshold: int = 30, + vision_batch_size: int = 5, + vision_llm_provider: str = "gemini", + progress_callback: Callable[[float, str], None] = None + ) -> List[Dict[Any, Any]]: + """ + 生成视频脚本的核心逻辑 + + Args: + video_path: 视频文件路径 + video_theme: 视频主题 + custom_prompt: 自定义提示词 + skip_seconds: 跳过开始的秒数 + threshold: 差异阈值 + vision_batch_size: 视觉处理批次大小 + vision_llm_provider: 视觉模型提供商 + progress_callback: 进度回调函数 + + Returns: + List[Dict]: 生成的视频脚本 + """ + if progress_callback is None: + progress_callback = lambda p, m: None + + try: + # 提取关键帧 + progress_callback(10, "正在提取关键帧...") + keyframe_files = await self._extract_keyframes( + video_path, + skip_seconds, + threshold + ) + + if vision_llm_provider == "gemini": + script = await self._process_with_gemini( + keyframe_files, + video_theme, + custom_prompt, + vision_batch_size, + progress_callback + ) + elif vision_llm_provider == "narratoapi": + script = await self._process_with_narrato( + keyframe_files, + video_theme, + custom_prompt, + vision_batch_size, + progress_callback + ) + else: + raise ValueError(f"Unsupported vision provider: {vision_llm_provider}") + + return json.loads(script) if isinstance(script, str) else script + + except Exception as e: + logger.exception("Generate script failed") + raise + + async def _extract_keyframes( + self, + video_path: str, + skip_seconds: int, + threshold: int + ) -> List[str]: + """提取视频关键帧""" + video_hash = utils.md5(video_path + str(os.path.getmtime(video_path))) + video_keyframes_dir = os.path.join(self.keyframes_dir, video_hash) + + # 检查缓存 + keyframe_files = [] + if os.path.exists(video_keyframes_dir): + for filename in sorted(os.listdir(video_keyframes_dir)): + if filename.endswith('.jpg'): + keyframe_files.append(os.path.join(video_keyframes_dir, filename)) + + if keyframe_files: + logger.info(f"Using cached keyframes: {video_keyframes_dir}") + return keyframe_files + + # 提取新的关键帧 + os.makedirs(video_keyframes_dir, exist_ok=True) + + try: + if config.frames.get("version") == "v2": + processor = video_processor_v2.VideoProcessor(video_path) + processor.process_video_pipeline( + output_dir=video_keyframes_dir, + skip_seconds=skip_seconds, + threshold=threshold + ) + else: + processor = video_processor.VideoProcessor(video_path) + processor.process_video( + output_dir=video_keyframes_dir, + skip_seconds=skip_seconds + ) + + for filename in sorted(os.listdir(video_keyframes_dir)): + if filename.endswith('.jpg'): + keyframe_files.append(os.path.join(video_keyframes_dir, filename)) + + return keyframe_files + + except Exception as e: + if os.path.exists(video_keyframes_dir): + import shutil + shutil.rmtree(video_keyframes_dir) + raise + + async def _process_with_gemini( + self, + keyframe_files: List[str], + video_theme: str, + custom_prompt: str, + vision_batch_size: int, + progress_callback: Callable[[float, str], None] + ) -> str: + """使用Gemini处理视频帧""" + progress_callback(30, "正在初始化视觉分析器...") + + # 获取Gemini配置 + vision_api_key = config.app.get("vision_gemini_api_key") + vision_model = config.app.get("vision_gemini_model_name") + + if not vision_api_key or not vision_model: + raise ValueError("未配置 Gemini API Key 或者模型") + + analyzer = vision_analyzer.VisionAnalyzer( + model_name=vision_model, + api_key=vision_api_key, + ) + + progress_callback(40, "正在分析关键帧...") + + # 执行异步分析 + results = await analyzer.analyze_images( + images=keyframe_files, + prompt=config.app.get('vision_analysis_prompt'), + batch_size=vision_batch_size + ) + + progress_callback(60, "正在整理分析结果...") + + # 合并所有批次的分析结果 + frame_analysis = "" + prev_batch_files = None + + for result in results: + if 'error' in result: + logger.warning(f"批次 {result['batch_index']} 处理出现警告: {result['error']}") + continue + + batch_files = self._get_batch_files(keyframe_files, result, vision_batch_size) + first_timestamp, last_timestamp, _ = self._get_batch_timestamps(batch_files, prev_batch_files) + + # 添加带时间戳的分析结果 + frame_analysis += f"\n=== {first_timestamp}-{last_timestamp} ===\n" + frame_analysis += result['response'] + frame_analysis += "\n" + + prev_batch_files = batch_files + + if not frame_analysis.strip(): + raise Exception("未能生成有效的帧分析结果") + + progress_callback(70, "正在生成脚本...") + + # 构建帧内容列表 + frame_content_list = [] + prev_batch_files = None + + for result in results: + if 'error' in result: + continue + + batch_files = self._get_batch_files(keyframe_files, result, vision_batch_size) + _, _, timestamp_range = self._get_batch_timestamps(batch_files, prev_batch_files) + + frame_content = { + "timestamp": timestamp_range, + "picture": result['response'], + "narration": "", + "OST": 2 + } + frame_content_list.append(frame_content) + prev_batch_files = batch_files + + if not frame_content_list: + raise Exception("没有有效的帧内容可以处理") + + progress_callback(90, "正在生成文案...") + + # 获取文本生成配置 + text_provider = config.app.get('text_llm_provider', 'gemini').lower() + text_api_key = config.app.get(f'text_{text_provider}_api_key') + text_model = config.app.get(f'text_{text_provider}_model_name') + + processor = ScriptProcessor( + model_name=text_model, + api_key=text_api_key, + prompt=custom_prompt, + video_theme=video_theme + ) + + return processor.process_frames(frame_content_list) + + async def _process_with_narrato( + self, + keyframe_files: List[str], + video_theme: str, + custom_prompt: str, + vision_batch_size: int, + progress_callback: Callable[[float, str], None] + ) -> str: + """使用NarratoAPI处理视频帧""" + # 创建临时目录 + temp_dir = utils.temp_dir("narrato") + + # 打包关键帧 + progress_callback(30, "正在打包关键帧...") + zip_path = os.path.join(temp_dir, f"keyframes_{int(time.time())}.zip") + + try: + if not utils.create_zip(keyframe_files, zip_path): + raise Exception("打包关键帧失败") + + # 获取API配置 + api_url = config.app.get("narrato_api_url") + api_key = config.app.get("narrato_api_key") + + if not api_key: + raise ValueError("未配置 Narrato API Key") + + headers = { + 'X-API-Key': api_key, + 'accept': 'application/json' + } + + api_params = { + 'batch_size': vision_batch_size, + 'use_ai': False, + 'start_offset': 0, + 'vision_model': config.app.get('narrato_vision_model', 'gemini-1.5-flash'), + 'vision_api_key': config.app.get('narrato_vision_key'), + 'llm_model': config.app.get('narrato_llm_model', 'qwen-plus'), + 'llm_api_key': config.app.get('narrato_llm_key'), + 'custom_prompt': custom_prompt + } + + progress_callback(40, "正在上传文件...") + with open(zip_path, 'rb') as f: + files = {'file': (os.path.basename(zip_path), f, 'application/x-zip-compressed')} + response = requests.post( + f"{api_url}/video/analyze", + headers=headers, + params=api_params, + files=files, + timeout=30 + ) + response.raise_for_status() + + task_data = response.json() + task_id = task_data["data"].get('task_id') + if not task_id: + raise Exception(f"无效的API响应: {response.text}") + + progress_callback(50, "正在等待分析结果...") + retry_count = 0 + max_retries = 60 + + while retry_count < max_retries: + try: + status_response = requests.get( + f"{api_url}/video/tasks/{task_id}", + headers=headers, + timeout=10 + ) + status_response.raise_for_status() + task_status = status_response.json()['data'] + + if task_status['status'] == 'SUCCESS': + return task_status['result']['data'] + elif task_status['status'] in ['FAILURE', 'RETRY']: + raise Exception(f"任务失败: {task_status.get('error')}") + + retry_count += 1 + time.sleep(2) + + except requests.RequestException as e: + logger.warning(f"获取任务状态失败,重试中: {str(e)}") + retry_count += 1 + time.sleep(2) + continue + + raise Exception("任务执行超时") + + finally: + # 清理临时文件 + try: + if os.path.exists(zip_path): + os.remove(zip_path) + except Exception as e: + logger.warning(f"清理临时文件失败: {str(e)}") + + def _get_batch_files( + self, + keyframe_files: List[str], + result: Dict[str, Any], + batch_size: int + ) -> List[str]: + """获取当前批次的图片文件""" + batch_start = result['batch_index'] * batch_size + batch_end = min(batch_start + batch_size, len(keyframe_files)) + return keyframe_files[batch_start:batch_end] + + def _get_batch_timestamps( + self, + batch_files: List[str], + prev_batch_files: List[str] = None + ) -> tuple[str, str, str]: + """获取一批文件的时间戳范围""" + if not batch_files: + logger.warning("Empty batch files") + return "00:00", "00:00", "00:00-00:00" + + if len(batch_files) == 1 and prev_batch_files and len(prev_batch_files) > 0: + first_frame = os.path.basename(prev_batch_files[-1]) + last_frame = os.path.basename(batch_files[0]) + else: + first_frame = os.path.basename(batch_files[0]) + last_frame = os.path.basename(batch_files[-1]) + + first_time = first_frame.split('_')[2].replace('.jpg', '') + last_time = last_frame.split('_')[2].replace('.jpg', '') + + def format_timestamp(time_str: str) -> str: + if len(time_str) < 4: + logger.warning(f"Invalid timestamp format: {time_str}") + return "00:00" + + minutes = int(time_str[-4:-2]) + seconds = int(time_str[-2:]) + + if seconds >= 60: + minutes += seconds // 60 + seconds = seconds % 60 + + return f"{minutes:02d}:{seconds:02d}" + + first_timestamp = format_timestamp(first_time) + last_timestamp = format_timestamp(last_time) + timestamp_range = f"{first_timestamp}-{last_timestamp}" + + return first_timestamp, last_timestamp, timestamp_range \ No newline at end of file diff --git a/webui/i18n/zh.json b/webui/i18n/zh.json index 68b968a..db17ccc 100644 --- a/webui/i18n/zh.json +++ b/webui/i18n/zh.json @@ -103,7 +103,6 @@ "Video Quality": "视频质量", "Custom prompt for LLM, leave empty to use default prompt": "自定义提示词,留空则使用默认提示词", "Proxy Settings": "代理设置", - "Language": "界面语言", "HTTP_PROXY": "HTTP 代理", "HTTPs_PROXY": "HTTPS 代理", "Vision Model Settings": "视频分析模型设置", From 45fae0b982dea83092b8685aa2807fec93f263d0 Mon Sep 17 00:00:00 2001 From: linyq Date: Mon, 18 Nov 2024 17:38:30 +0800 Subject: [PATCH 2/9] =?UTF-8?q?feat(v2):=20=E6=96=B0=E5=A2=9E=E8=A7=86?= =?UTF-8?q?=E9=A2=91=E8=A3=81=E5=89=AA=E5=92=8CYouTube=E8=A7=86=E9=A2=91?= =?UTF-8?q?=E4=B8=8B=E8=BD=BD=E5=8A=9F=E8=83=BD?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - 在 schema_v2.py 中添加了新的请求和响应模型 - 在 script.py 中实现了 /scripts/crop 和 /youtube/download 接口- 新增 video_service.py 和 youtube_service.py 服务模块 - 更新 utils.py 中的工具函数以支持新功能 --- app/controllers/v2/script.py | 84 +++++++++++++++++++- app/models/schema_v2.py | 31 +++++++- app/services/video_service.py | 58 ++++++++++++++ app/services/youtube_service.py | 135 ++++++++++++++++++++++++++++++++ requirements.txt | 3 +- 5 files changed, 304 insertions(+), 7 deletions(-) create mode 100644 app/services/video_service.py create mode 100644 app/services/youtube_service.py diff --git a/app/controllers/v2/script.py b/app/controllers/v2/script.py index 85f4238..94a337b 100644 --- a/app/controllers/v2/script.py +++ b/app/controllers/v2/script.py @@ -1,18 +1,29 @@ from fastapi import APIRouter, BackgroundTasks from loguru import logger +import os -from app.models.schema_v2 import GenerateScriptRequest, GenerateScriptResponse +from app.models.schema_v2 import ( + GenerateScriptRequest, + GenerateScriptResponse, + CropVideoRequest, + CropVideoResponse, + DownloadVideoRequest, + DownloadVideoResponse +) from app.services.script_service import ScriptGenerator +from app.services.video_service import VideoService from app.utils import utils from app.controllers.v2.base import v2_router +from app.models.schema import VideoClipParams +from app.services.youtube_service import YoutubeService -# router = APIRouter(prefix="/api/v2", tags=["Script Generation V2"]) router = v2_router() + @router.post( "/scripts/generate", response_model=GenerateScriptResponse, - summary="生成视频脚本 (V2)" + summary="同步请求;生成视频脚本 (V2)" ) async def generate_script( request: GenerateScriptRequest, @@ -42,4 +53,69 @@ async def generate_script( except Exception as e: logger.exception(f"Generate script failed: {str(e)}") - raise \ No newline at end of file + raise + + +@router.post( + "/scripts/crop", + response_model=CropVideoResponse, + summary="同步请求;裁剪视频 (V2)" +) +async def crop_video( + request: CropVideoRequest, + background_tasks: BackgroundTasks +): + """ + 根据脚本裁剪视频的V2版本API + """ + try: + # 调用视频裁剪服务 + video_service = VideoService() + task_id, subclip_videos = await video_service.crop_video( + video_path=request.video_origin_path, + video_script=request.video_script + ) + logger.debug(f"裁剪视频成功,视频片段路径: {subclip_videos}") + logger.debug(type(subclip_videos)) + return { + "task_id": task_id, + "subclip_videos": subclip_videos + } + + except Exception as e: + logger.exception(f"Crop video failed: {str(e)}") + raise + + +@router.post( + "/youtube/download", + response_model=DownloadVideoResponse, + summary="同步请求;下载YouTube视频 (V2)" +) +async def download_youtube_video( + request: DownloadVideoRequest, + background_tasks: BackgroundTasks +): + """ + 下载指定分辨率的YouTube视频 + """ + try: + youtube_service = YoutubeService() + task_id, output_path, filename = await youtube_service.download_video( + url=request.url, + resolution=request.resolution, + output_format=request.output_format, + rename=request.rename + ) + + return { + "task_id": task_id, + "output_path": output_path, + "resolution": request.resolution, + "format": request.output_format, + "filename": filename + } + + except Exception as e: + logger.exception(f"Download YouTube video failed: {str(e)}") + raise diff --git a/app/models/schema_v2.py b/app/models/schema_v2.py index 786c018..9894d89 100644 --- a/app/models/schema_v2.py +++ b/app/models/schema_v2.py @@ -1,6 +1,7 @@ from typing import Optional, List from pydantic import BaseModel + class GenerateScriptRequest(BaseModel): video_path: str video_theme: Optional[str] = "" @@ -9,7 +10,33 @@ class GenerateScriptRequest(BaseModel): threshold: Optional[int] = 30 vision_batch_size: Optional[int] = 5 vision_llm_provider: Optional[str] = "gemini" - + + class GenerateScriptResponse(BaseModel): task_id: str - script: List[dict] \ No newline at end of file + script: List[dict] + + +class CropVideoRequest(BaseModel): + video_origin_path: str + video_script: List[dict] + + +class CropVideoResponse(BaseModel): + task_id: str + subclip_videos: dict + + +class DownloadVideoRequest(BaseModel): + url: str + resolution: str + output_format: Optional[str] = "mp4" + rename: Optional[str] = None + + +class DownloadVideoResponse(BaseModel): + task_id: str + output_path: str + resolution: str + format: str + filename: str diff --git a/app/services/video_service.py b/app/services/video_service.py new file mode 100644 index 0000000..2a0a9a6 --- /dev/null +++ b/app/services/video_service.py @@ -0,0 +1,58 @@ +import os +from uuid import uuid4 +from loguru import logger +from typing import Dict, List, Optional, Tuple + +from app.services import material +from app.models.schema import VideoClipParams +from app.utils import utils + + +class VideoService: + @staticmethod + async def crop_video( + video_path: str, + video_script: List[dict] + ) -> Tuple[str, Dict[str, str]]: + """ + 裁剪视频服务 + + Args: + video_path: 视频文件路径 + video_script: 视频脚本列表 + + Returns: + Tuple[str, Dict[str, str]]: (task_id, 裁剪后的视频片段字典) + 视频片段字典格式: {timestamp: video_path} + """ + try: + task_id = str(uuid4()) + + # 从脚本中提取时间戳列表 + time_list = [scene['timestamp'] for scene in video_script] + + # 调用裁剪服务 + subclip_videos = material.clip_videos( + task_id=task_id, + timestamp_terms=time_list, + origin_video=video_path + ) + + if subclip_videos is None: + raise ValueError("裁剪视频失败") + + # 更新脚本中的视频路径 + for scene in video_script: + try: + scene['path'] = subclip_videos[scene['timestamp']] + except KeyError as err: + logger.error(f"更新视频路径失败: {err}") + + logger.debug(f"裁剪视频成功,共生成 {len(time_list)} 个视频片段") + logger.debug(f"视频片段路径: {subclip_videos}") + + return task_id, subclip_videos + + except Exception as e: + logger.exception("裁剪视频失败") + raise \ No newline at end of file diff --git a/app/services/youtube_service.py b/app/services/youtube_service.py new file mode 100644 index 0000000..d478198 --- /dev/null +++ b/app/services/youtube_service.py @@ -0,0 +1,135 @@ +import yt_dlp +import os +from typing import List, Dict, Optional, Tuple +from loguru import logger +from uuid import uuid4 + +from app.utils import utils + + +class YoutubeService: + def __init__(self): + self.supported_formats = ['mp4', 'mkv', 'webm', 'flv', 'avi'] + + def _get_video_formats(self, url: str) -> List[Dict]: + """获取视频可用的格式列表""" + ydl_opts = { + 'quiet': True, + 'no_warnings': True + } + + try: + with yt_dlp.YoutubeDL(ydl_opts) as ydl: + info = ydl.extract_info(url, download=False) + formats = info.get('formats', []) + + format_list = [] + for f in formats: + format_info = { + 'format_id': f.get('format_id', 'N/A'), + 'ext': f.get('ext', 'N/A'), + 'resolution': f.get('format_note', 'N/A'), + 'filesize': f.get('filesize', 'N/A'), + 'vcodec': f.get('vcodec', 'N/A'), + 'acodec': f.get('acodec', 'N/A') + } + format_list.append(format_info) + + return format_list + except Exception as e: + logger.error(f"获取视频格式失败: {str(e)}") + raise + + def _validate_format(self, output_format: str) -> None: + """验证输出格式是否支持""" + if output_format.lower() not in self.supported_formats: + raise ValueError( + f"不支持的视频格式: {output_format}。" + f"支持的格式: {', '.join(self.supported_formats)}" + ) + + async def download_video( + self, + url: str, + resolution: str, + output_format: str = 'mp4', + rename: Optional[str] = None + ) -> Tuple[str, str, str]: + """ + 下载指定分辨率的视频 + + Args: + url: YouTube视频URL + resolution: 目标分辨率 ('2160p', '1440p', '1080p', '720p' etc.) + output_format: 输出视频格式 + rename: 可选的重命名 + + Returns: + Tuple[str, str, str]: (task_id, output_path, filename) + """ + try: + task_id = str(uuid4()) + self._validate_format(output_format) + + # 获取所有可用格式 + formats = self._get_video_formats(url) + + # 查找指定分辨率的最佳视频格式 + target_format = None + for fmt in formats: + if fmt['resolution'] == resolution and fmt['vcodec'] != 'none': + target_format = fmt + break + + if target_format is None: + available_resolutions = set( + fmt['resolution'] for fmt in formats + if fmt['resolution'] != 'N/A' and fmt['vcodec'] != 'none' + ) + raise ValueError( + f"未找到 {resolution} 分辨率的视频。" + f"可用分辨率: {', '.join(sorted(available_resolutions))}" + ) + + # 创建输出目录 + output_dir = utils.video_dir() + os.makedirs(output_dir, exist_ok=True) + + # 设置下载选项 + if rename: + # 如果指定了重命名,直接使用新名字 + filename = f"{rename}.{output_format}" + output_template = os.path.join(output_dir, filename) + else: + # 否则使用任务ID和原标题 + output_template = os.path.join(output_dir, f'{task_id}_%(title)s.%(ext)s') + + ydl_opts = { + 'format': f"{target_format['format_id']}+bestaudio[ext=m4a]/best", + 'outtmpl': output_template, + 'merge_output_format': output_format.lower(), + 'postprocessors': [{ + 'key': 'FFmpegVideoConvertor', + 'preferedformat': output_format.lower(), + }] + } + + # 执行下载 + with yt_dlp.YoutubeDL(ydl_opts) as ydl: + info = ydl.extract_info(url, download=True) + if rename: + # 如果指定了重命名,使用新文件名 + output_path = output_template + filename = os.path.basename(output_path) + else: + # 否则使用原始标题 + video_title = info.get('title', task_id) + filename = f"{task_id}_{video_title}.{output_format}" + output_path = os.path.join(output_dir, filename) + + logger.info(f"视频下载成功: {output_path}") + return task_id, output_path, filename + + except Exception as e: + logger.exception("下载视频失败") + raise diff --git a/requirements.txt b/requirements.txt index 2ae1f29..3024e71 100644 --- a/requirements.txt +++ b/requirements.txt @@ -31,4 +31,5 @@ python-dotenv~=1.0.1 openai~=1.53.0 tqdm>=4.66.6 tenacity>=9.0.0 -tiktoken==0.8.0 \ No newline at end of file +tiktoken==0.8.0 +yt-dlp==2024.11.18 From 58773d605c782604f150705fb5d74703031891f0 Mon Sep 17 00:00:00 2001 From: linyq Date: Mon, 18 Nov 2024 18:01:31 +0800 Subject: [PATCH 3/9] =?UTF-8?q?feat(v2):=20=E6=B7=BB=E5=8A=A0=E5=BC=80?= =?UTF-8?q?=E5=A7=8B=E8=A7=86=E9=A2=91=E5=89=AA=E8=BE=91=E4=BB=BB=E5=8A=A1?= =?UTF-8?q?=E7=9A=84=20API=20=E6=8E=A5=E5=8F=A3-=20=E6=96=B0=E5=A2=9E=20St?= =?UTF-8?q?artSubclipRequest=20=E5=92=8C=20StartSubclipResponse=20?= =?UTF-8?q?=E6=A8=A1=E5=9E=8B-=20=E5=AE=9E=E7=8E=B0=20/scripts/start-subcl?= =?UTF-8?q?ip=20=E6=8E=A5=E5=8F=A3=EF=BC=8C=E7=94=A8=E4=BA=8E=E5=90=AF?= =?UTF-8?q?=E5=8A=A8=E8=A7=86=E9=A2=91=E5=89=AA=E8=BE=91=E4=BB=BB=E5=8A=A1?= =?UTF-8?q?=20-=20=E6=94=AF=E6=8C=81=E5=BC=82=E6=AD=A5=E5=A4=84=E7=90=86?= =?UTF-8?q?=EF=BC=8C=E8=BF=94=E5=9B=9E=E4=BB=BB=E5=8A=A1=20ID=20=E5=92=8C?= =?UTF-8?q?=E5=88=9D=E5=A7=8B=E7=8A=B6=E6=80=81?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- app/controllers/v2/script.py | 49 +++++++++++++++++++++++++++++++++++- app/models/schema_v2.py | 20 +++++++++++++++ 2 files changed, 68 insertions(+), 1 deletion(-) diff --git a/app/controllers/v2/script.py b/app/controllers/v2/script.py index 94a337b..c50ee0e 100644 --- a/app/controllers/v2/script.py +++ b/app/controllers/v2/script.py @@ -8,14 +8,18 @@ from app.models.schema_v2 import ( CropVideoRequest, CropVideoResponse, DownloadVideoRequest, - DownloadVideoResponse + DownloadVideoResponse, + StartSubclipRequest, + StartSubclipResponse ) +from app.models.schema import VideoClipParams from app.services.script_service import ScriptGenerator from app.services.video_service import VideoService from app.utils import utils from app.controllers.v2.base import v2_router from app.models.schema import VideoClipParams from app.services.youtube_service import YoutubeService +from app.services import task as task_service router = v2_router() @@ -119,3 +123,46 @@ async def download_youtube_video( except Exception as e: logger.exception(f"Download YouTube video failed: {str(e)}") raise + + +@router.post( + "/scripts/start-subclip", + response_model=StartSubclipResponse, + summary="异步请求;开始视频剪辑任务 (V2)" +) +async def start_subclip( + request: VideoClipParams, + background_tasks: BackgroundTasks +): + """ + 开始视频剪辑任务的V2版本API + """ + try: + # 构建参数对象 + params = VideoClipParams( + video_origin_path=request.video_origin_path, + video_clip_json_path=request.video_clip_json_path, + voice_name=request.voice_name, + voice_rate=request.voice_rate, + voice_pitch=request.voice_pitch, + subtitle_enabled=request.subtitle_enabled, + video_aspect=request.video_aspect, + n_threads=request.n_threads + ) + + # 在后台任务中执行视频剪辑 + background_tasks.add_task( + task_service.start_subclip, + task_id=request.task_id, + params=params, + subclip_path_videos=request.subclip_videos + ) + + return { + "task_id": request.task_id, + "state": "PROCESSING" # 初始状态 + } + + except Exception as e: + logger.exception(f"Start subclip task failed: {str(e)}") + raise diff --git a/app/models/schema_v2.py b/app/models/schema_v2.py index 9894d89..1611a3b 100644 --- a/app/models/schema_v2.py +++ b/app/models/schema_v2.py @@ -40,3 +40,23 @@ class DownloadVideoResponse(BaseModel): resolution: str format: str filename: str + + +class StartSubclipRequest(BaseModel): + task_id: str + video_origin_path: str + video_clip_json_path: str + voice_name: Optional[str] = None + voice_rate: Optional[int] = 0 + voice_pitch: Optional[int] = 0 + subtitle_enabled: Optional[bool] = True + video_aspect: Optional[str] = "16:9" + n_threads: Optional[int] = 4 + subclip_videos: list # 从裁剪视频接口获取的视频片段字典 + + +class StartSubclipResponse(BaseModel): + task_id: str + state: str + videos: Optional[List[str]] = None + combined_videos: Optional[List[str]] = None From b34d9fe14c18ebfb599afbace3663e862fa3ec11 Mon Sep 17 00:00:00 2001 From: linyqh Date: Tue, 19 Nov 2024 01:23:20 +0800 Subject: [PATCH 4/9] =?UTF-8?q?refactor(webui):=20=E4=BC=98=E5=8C=96?= =?UTF-8?q?=E9=9F=B3=E9=A2=91=E8=AE=BE=E7=BD=AE=E7=95=8C=E9=9D=A2=E5=B9=B6?= =?UTF-8?q?=E6=B7=BB=E5=8A=A0=E4=BB=A3=E7=90=86=E9=85=8D=E7=BD=AE?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - 修改支持的语音列表,仅保留中文语音 - 在主程序中添加代理配置环境变量 -优化剪辑视频函数,改为返回字典类型 - 更新任务服务中的剪辑视频函数,适应新的参数类型 - 修改测试用例中的视频剪辑函数,增加输出路径参数 - 更新脚本控制器中的剪辑视频函数,集成任务 ID 和子视频字典 --- app/controllers/v2/script.py | 8 +- app/pipeline/video_pipeline.py | 162 +++++++++++++++ app/services/material.py | 2 +- app/services/task.py | 2 +- app/services/voice.py | 7 +- app/services/youtube_service.py | 21 +- app/test/test_moviepy.py | 8 +- main.py | 3 + webui.txt | 323 +++++++++++++++++++++++++++++ webui/components/audio_settings.py | 2 +- 10 files changed, 522 insertions(+), 16 deletions(-) create mode 100644 app/pipeline/video_pipeline.py diff --git a/app/controllers/v2/script.py b/app/controllers/v2/script.py index c50ee0e..c3501eb 100644 --- a/app/controllers/v2/script.py +++ b/app/controllers/v2/script.py @@ -132,6 +132,8 @@ async def download_youtube_video( ) async def start_subclip( request: VideoClipParams, + task_id: str, + subclip_videos: dict, background_tasks: BackgroundTasks ): """ @@ -153,13 +155,13 @@ async def start_subclip( # 在后台任务中执行视频剪辑 background_tasks.add_task( task_service.start_subclip, - task_id=request.task_id, + task_id=task_id, params=params, - subclip_path_videos=request.subclip_videos + subclip_path_videos=subclip_videos ) return { - "task_id": request.task_id, + "task_id": task_id, "state": "PROCESSING" # 初始状态 } diff --git a/app/pipeline/video_pipeline.py b/app/pipeline/video_pipeline.py new file mode 100644 index 0000000..1c54bad --- /dev/null +++ b/app/pipeline/video_pipeline.py @@ -0,0 +1,162 @@ +import requests +import json +import time +from typing import Dict, Any + +class VideoPipeline: + def __init__(self, base_url: str = "http://127.0.0.1:8080"): + self.base_url = base_url + + def download_video(self, url: str, resolution: str = "1080p", + output_format: str = "mp4", rename: str = None) -> Dict[str, Any]: + """下载视频的第一步""" + endpoint = f"{self.base_url}/api/v2/youtube/download" + payload = { + "url": url, + "resolution": resolution, + "output_format": output_format, + "rename": rename or time.strftime("%Y-%m-%d") + } + + response = requests.post(endpoint, json=payload) + response.raise_for_status() + return response.json() + + def generate_script(self, video_path: str, skip_seconds: int = 0, + threshold: int = 30, vision_batch_size: int = 10, + vision_llm_provider: str = "gemini") -> Dict[str, Any]: + """生成脚本的第二步""" + endpoint = f"{self.base_url}/api/v2/scripts/generate" + payload = { + "video_path": video_path, + "skip_seconds": skip_seconds, + "threshold": threshold, + "vision_batch_size": vision_batch_size, + "vision_llm_provider": vision_llm_provider + } + + response = requests.post(endpoint, json=payload) + response.raise_for_status() + return response.json() + + def crop_video(self, video_path: str, script: list) -> Dict[str, Any]: + """剪辑视频的第三步""" + endpoint = f"{self.base_url}/api/v2/scripts/crop" + payload = { + "video_origin_path": video_path, + "video_script": script + } + + response = requests.post(endpoint, json=payload) + response.raise_for_status() + return response.json() + + def generate_final_video(self, task_id: str, video_path: str, + script_path: str, script: list, subclip_videos: Dict[str, str]) -> Dict[str, Any]: + """生成最终视频的第四步""" + endpoint = f"{self.base_url}/api/v2/scripts/start-subclip" + + request_data = { + "video_clip_json": script, + "video_clip_json_path": script_path, + "video_origin_path": video_path, + "video_aspect": "16:9", + "video_language": "zh-CN", + "voice_name": "zh-CN-YunjianNeural", + "voice_volume": 1, + "voice_rate": 1.2, + "voice_pitch": 1, + "bgm_name": "random", + "bgm_type": "random", + "bgm_file": "", + "bgm_volume": 0.3, + "subtitle_enabled": True, + "subtitle_position": "bottom", + "font_name": "STHeitiMedium.ttc", + "text_fore_color": "#FFFFFF", + "text_background_color": "transparent", + "font_size": 75, + "stroke_color": "#000000", + "stroke_width": 1.5, + "custom_position": 70, + "n_threads": 8 + } + + payload = { + "request": request_data, + "subclip_videos": subclip_videos + } + + params = {"task_id": task_id} + response = requests.post(endpoint, params=params, json=payload) + response.raise_for_status() + return response.json() + + def save_script_to_json(self, script: list) -> str: + """保存脚本到json文件""" + timestamp = time.strftime("%Y-%m%d-%H%M%S") + script_path = f"E:\\projects\\NarratoAI\\resource\\scripts\\{timestamp}.json" + + try: + with open(script_path, 'w', encoding='utf-8') as f: + json.dump(script, f, ensure_ascii=False, indent=2) + print(f"脚本已保存到: {script_path}") + return script_path + except Exception as e: + print(f"保存脚本失败: {str(e)}") + raise + + def run_pipeline(self, youtube_url: str) -> Dict[str, Any]: + """运行完整的pipeline""" + try: + # 1. 下载视频 + print("开始下载视频...") + download_result = self.download_video(youtube_url) + video_path = download_result["output_path"] + + # 2. 生成脚本 + print("开始生成脚本...") + script_result = self.generate_script(video_path) + script = script_result["script"] + + # 2.1 保存脚本到json文件 + print("保存脚本到json文件...") + script_path = self.save_script_to_json(script) + script_result["script_path"] = script_path + + # 3. 剪辑视频 + print("开始剪辑视频...") + crop_result = self.crop_video(video_path, script) + subclip_videos = crop_result["subclip_videos"] + + # 4. 生成最终视频 + print("开始生成最终视频...") + final_result = self.generate_final_video( + crop_result["task_id"], + video_path, + script_path, + script, + subclip_videos + ) + + return { + "status": "success", + "download_result": download_result, + "script_result": script_result, + "crop_result": crop_result, + "final_result": final_result + } + + except Exception as e: + return { + "status": "error", + "error": str(e) + } + +# 使用示例 +if __name__ == "__main__": + pipeline = VideoPipeline() + result = pipeline.run_pipeline("https://www.youtube.com/watch?v=Kenm35gdqtk") + print(json.dumps(result, indent=2, ensure_ascii=False)) + result2 = pipeline.run_pipeline("https://www.youtube.com/watch?v=aEsHAcedzgw") + print(json.dumps(result2, indent=2, ensure_ascii=False)) diff --git a/app/services/material.py b/app/services/material.py index bab1aba..696eda8 100644 --- a/app/services/material.py +++ b/app/services/material.py @@ -363,7 +363,7 @@ def save_clip_video(timestamp: str, origin_video: str, save_dir: str = "") -> di return {} -def clip_videos(task_id: str, timestamp_terms: List[str], origin_video: str, progress_callback=None): +def clip_videos(task_id: str, timestamp_terms: List[str], origin_video: str, progress_callback=None) -> dict: """ 剪辑视频 Args: diff --git a/app/services/task.py b/app/services/task.py index c903047..c030574 100644 --- a/app/services/task.py +++ b/app/services/task.py @@ -324,7 +324,7 @@ def start(task_id, params: VideoParams, stop_at: str = "video"): return kwargs -def start_subclip(task_id: str, params: VideoClipParams, subclip_path_videos: list): +def start_subclip(task_id: str, params: VideoClipParams, subclip_path_videos: dict): """ 后台任务(自动剪辑视频进行剪辑) diff --git a/app/services/voice.py b/app/services/voice.py index 02245f6..21082c1 100644 --- a/app/services/voice.py +++ b/app/services/voice.py @@ -989,6 +989,9 @@ Gender: Female Name: zh-CN-XiaoxiaoMultilingualNeural-V2 Gender: Female + +Name: zh-CN-YunxiNeural-V2 +Gender: Male """.strip() voices = [] name = "" @@ -1034,8 +1037,8 @@ def is_azure_v2_voice(voice_name: str): def tts( text: str, voice_name: str, voice_rate: float, voice_pitch: float, voice_file: str ) -> [SubMaker, None]: - # if is_azure_v2_voice(voice_name): - # return azure_tts_v2(text, voice_name, voice_file) + if is_azure_v2_voice(voice_name): + return azure_tts_v2(text, voice_name, voice_file) return azure_tts_v1(text, voice_name, voice_rate, voice_pitch, voice_file) diff --git a/app/services/youtube_service.py b/app/services/youtube_service.py index d478198..e4a7a79 100644 --- a/app/services/youtube_service.py +++ b/app/services/youtube_service.py @@ -5,6 +5,7 @@ from loguru import logger from uuid import uuid4 from app.utils import utils +from app.services import video as VideoService class YoutubeService: @@ -61,6 +62,7 @@ class YoutubeService: Args: url: YouTube视频URL resolution: 目标分辨率 ('2160p', '1440p', '1080p', '720p' etc.) + 注意:对于类似'1080p60'的输入会被处理为'1080p' output_format: 输出视频格式 rename: 可选的重命名 @@ -71,23 +73,32 @@ class YoutubeService: task_id = str(uuid4()) self._validate_format(output_format) + # 标准化分辨率格式 + base_resolution = resolution.split('p')[0] + 'p' + # 获取所有可用格式 formats = self._get_video_formats(url) # 查找指定分辨率的最佳视频格式 target_format = None for fmt in formats: - if fmt['resolution'] == resolution and fmt['vcodec'] != 'none': - target_format = fmt - break + fmt_resolution = fmt['resolution'] + # 将格式的分辨率也标准化后进行比较 + if fmt_resolution != 'N/A': + fmt_base_resolution = fmt_resolution.split('p')[0] + 'p' + if fmt_base_resolution == base_resolution and fmt['vcodec'] != 'none': + target_format = fmt + break if target_format is None: + # 收集可用分辨率时也进行标准化 available_resolutions = set( - fmt['resolution'] for fmt in formats + fmt['resolution'].split('p')[0] + 'p' + for fmt in formats if fmt['resolution'] != 'N/A' and fmt['vcodec'] != 'none' ) raise ValueError( - f"未找到 {resolution} 分辨率的视频。" + f"未找到 {base_resolution} 分辨率的视频。" f"可用分辨率: {', '.join(sorted(available_resolutions))}" ) diff --git a/app/test/test_moviepy.py b/app/test/test_moviepy.py index d37d518..208b708 100644 --- a/app/test/test_moviepy.py +++ b/app/test/test_moviepy.py @@ -31,7 +31,7 @@ def format_duration(seconds: float) -> str: return f"{minutes:02d}:{remaining_seconds:02d}" -def cut_video(video_path: str, start_time: str, end_time: str) -> None: +def cut_video(video_path: str, start_time: str, end_time: str, output_path: str) -> None: """ 剪辑视频 参数: @@ -53,11 +53,13 @@ def cut_video(video_path: str, start_time: str, end_time: str) -> None: # 剪辑视频 video = video.subclip(start_seconds, end_seconds) - video.write_videofile("../../resource/videos/cut_video2.mp4") + video.write_videofile("../../resource/videos/cut_video3.mp4") # 释放资源 video.close() if __name__ == "__main__": - cut_video("../../resource/videos/best.mp4", "00:40", "02:40") + # cut_video("E:\\NarratoAI_v0.3.5_cuda\\NarratoAI\storage\\tasks\ca4fee22-350b-47f9-bb2f-802ad96774f7\\final-2.mp4", "00:00", "07:00", "E:\\NarratoAI_v0.3.5_cuda\\NarratoAI\storage\\tasks\\yyjx2-1") + # cut_video("E:\\NarratoAI_v0.3.5_cuda\\NarratoAI\storage\\tasks\ca4fee22-350b-47f9-bb2f-802ad96774f7\\final-2.mp4", "07:00", "14:00", "E:\\NarratoAI_v0.3.5_cuda\\NarratoAI\storage\\tasks\\yyjx2-2") + cut_video("E:\\NarratoAI_v0.3.5_cuda\\NarratoAI\storage\\tasks\ca4fee22-350b-47f9-bb2f-802ad96774f7\\final-2.mp4", "14:00", "22:00", "E:\\NarratoAI_v0.3.5_cuda\\NarratoAI\storage\\tasks\\yyjx2-3") diff --git a/main.py b/main.py index e84f32b..bfec175 100644 --- a/main.py +++ b/main.py @@ -1,3 +1,4 @@ +import os import uvicorn from loguru import logger @@ -7,6 +8,8 @@ if __name__ == "__main__": logger.info( "start server, docs: http://127.0.0.1:" + str(config.listen_port) + "/docs" ) + os.environ["HTTP_PROXY"] = config.proxy.get("http") + os.environ["HTTPS_PROXY"] = config.proxy.get("https") uvicorn.run( app="app.asgi:app", host=config.listen_host, diff --git a/webui.txt b/webui.txt index e835524..b64b320 100644 --- a/webui.txt +++ b/webui.txt @@ -47,3 +47,326 @@ pause rem set HF_ENDPOINT=https://hf-mirror.com streamlit run webui.py --browser.serverAddress="127.0.0.1" --server.enableCORS=True --server.maxUploadSize=2048 --browser.gatherUsageStats=False + +请求0: +curl -X 'POST' \ + 'http://127.0.0.1:8080/api/v2/youtube/download' \ + -H 'accept: application/json' \ + -H 'Content-Type: application/json' \ + -d '{ + "url": "https://www.youtube.com/watch?v=Kenm35gdqtk", + "resolution": "1080p", + "output_format": "mp4", + "rename": "2024-11-19" +}' +{ + "url": "https://www.youtube.com/watch?v=Kenm35gdqtk", + "resolution": "1080p", + "output_format": "mp4", + "rename": "2024-11-19" +} + +请求1: +curl -X 'POST' \ + 'http://127.0.0.1:8080/api/v2/scripts/generate' \ + -H 'accept: application/json' \ + -H 'Content-Type: application/json' \ + -d '{ + "video_path": "E:\\projects\\NarratoAI\\resource\\videos\\test.mp4", + "skip_seconds": 0, + "threshold": 30, + "vision_batch_size": 10, + "vision_llm_provider": "gemini" +}' +{ + "video_path": "E:\\projects\\NarratoAI\\resource\\videos\\test.mp4", + "skip_seconds": 0, + "threshold": 30, + "vision_batch_size": 10, + "vision_llm_provider": "gemini" +} + +请求2: +curl -X 'POST' \ + 'http://127.0.0.1:8080/api/v2/scripts/crop' \ + -H 'accept: application/json' \ + -H 'Content-Type: application/json' \ + -d '{ + "video_origin_path": "E:\\projects\\NarratoAI\\resource\\videos\\test.mp4", + "video_script": [ + { + "timestamp": "00:10-01:01", + "picture": "好的,以下是视频画面的客观描述:\n\n视频展现一名留着胡须的男子在森林里挖掘。\n\n画面首先展现男子从后方视角,背着军绿色背包,穿着卡其色长裤和深色T恤,走向一个泥土斜坡。背包上似乎有一个镐头。\n\n下一个镜头特写展现了该背包,一个镐头从背包里伸出来,包里还有一些其他工具。\n\n然后,视频显示该男子用镐头挖掘泥土斜坡。\n\n接下来是一些近景镜头,展现男子的靴子在泥土中行走,以及男子用手清理泥土。\n\n其他镜头从不同角度展现该男子在挖掘,包括从侧面和上方。\n\n可以看到他用工具挖掘,清理泥土,并检查挖出的土壤。\n\n最后,一个镜头展现了挖出的土壤的质地和颜色。", + "narration": "好的,接下来就是我们这位“胡须大侠”的精彩冒险了!只见他背着军绿色的背包,迈着比我上班还不情愿的步伐走向那泥土斜坡。哎呀,这个背包可真是个宝贝,里面藏着一把镐头和一些工具,简直像是个随身携带的“建筑工具箱”! \n\n看他挥舞着镐头,挖掘泥土的姿势,仿佛在进行一场“挖土大赛”,结果却比我做饭还要糟糕。泥土飞扬中,他的靴子也成了“泥巴艺术家”。最后,那堆色泽各异的土壤就像他心情的写照——五彩斑斓又略显混乱!真是一次让人捧腹的建造之旅!", + "OST": 2, + "new_timestamp": "00:00-00:51" + }, + { + "timestamp": "01:07-01:53", + "picture": "好的,以下是视频画面的客观描述:\n\n视频以一系列森林环境的镜头开头。\n\n第一个镜头是一个特写镜头,镜头中显示的是一些带有水滴的绿色叶子。\n\n第二个镜头显示一个留着胡须的男子在森林中挖掘一个洞。 他跪在地上,用工具挖土。\n\n第三个镜头是一个中等镜头,显示同一个人坐在他挖好的洞边休息。\n\n第四个镜头显示该洞的内部结构,该洞在树根和地面之间。\n\n第五个镜头显示该男子用斧头砍树枝。\n\n第六个镜头显示一堆树枝横跨一个泥泞的小水坑。\n\n第七个镜头显示更多茂盛的树叶和树枝在阳光下。\n\n第八个镜头显示更多茂盛的树叶和树枝。\n\n\n", + "narration": "接下来,我们的“挖土大师”又开始了他的森林探险。看这镜头,水滴在叶子上闪烁,仿佛在说:“快来,快来,这里有故事!”他一边挖洞,一边像个新手厨师试图切洋葱——每一下都小心翼翼,生怕自己不小心挖出个“历史遗址”。坐下休息的时候,脸上的表情就像发现新大陆一样!然后,他拿起斧头砍树枝,简直是现代版的“神雕侠侣”,只不过对象是树木。最后,那堆树枝架过泥泞的小水坑,仿佛在说:“我就是不怕湿脚的勇士!”这就是我们的建造之旅!", + "OST": 2, + "new_timestamp": "00:51-01:37" + } + ] +}' +{ + "video_origin_path": "E:\\projects\\NarratoAI\\resource\\videos\\test.mp4", + "video_script": [ + { + "timestamp": "00:10-01:01", + "picture": "好的,以下是视频画面的客观描述:\n\n视频展现一名留着胡须的男子在森林里挖掘。\n\n画面首先展现男子从后方视角,背着军绿色背包,穿着卡其色长裤和深色T恤,走向一个泥土斜坡。背包上似乎有一个镐头。\n\n下一个镜头特写展现了该背包,一个镐头从背包里伸出来,包里还有一些其他工具。\n\n然后,视频显示该男子用镐头挖掘泥土斜坡。\n\n接下来是一些近景镜头,展现男子的靴子在泥土中行走,以及男子用手清理泥土。\n\n其他镜头从不同角度展现该男子在挖掘,包括从侧面和上方。\n\n可以看到他用工具挖掘,清理泥土,并检查挖出的土壤。\n\n最后,一个镜头展现了挖出的土壤的质地和颜色。", + "narration": "好的,接下来就是我们这位“胡须大侠”的精彩冒险了!只见他背着军绿色的背包,迈着比我上班还不情愿的步伐走向那泥土斜坡。哎呀,这个背包可真是个宝贝,里面藏着一把镐头和一些工具,简直像是个随身携带的“建筑工具箱”! \n\n看他挥舞着镐头,挖掘泥土的姿势,仿佛在进行一场“挖土大赛”,结果却比我做饭还要糟糕。泥土飞扬中,他的靴子也成了“泥巴艺术家”。最后,那堆色泽各异的土壤就像他心情的写照——五彩斑斓又略显混乱!真是一次让人捧腹的建造之旅!", + "OST": 2, + "new_timestamp": "00:00-00:51" + }, + { + "timestamp": "01:07-01:53", + "picture": "好的,以下是视频画面的客观描述:\n\n视频以一系列森林环境的镜头开头。\n\n第一个镜头是一个特写镜头,镜头中显示的是一些带有水滴的绿色叶子。\n\n第二个镜头显示一个留着胡须的男子在森林中挖掘一个洞。 他跪在地上,用工具挖土。\n\n第三个镜头是一个中等镜头,显示同一个人坐在他挖好的洞边休息。\n\n第四个镜头显示该洞的内部结构,该洞在树根和地面之间。\n\n第五个镜头显示该男子用斧头砍树枝。\n\n第六个镜头显示一堆树枝横跨一个泥泞的小水坑。\n\n第七个镜头显示更多茂盛的树叶和树枝在阳光下。\n\n第八个镜头显示更多茂盛的树叶和树枝。\n\n\n", + "narration": "接下来,我们的“挖土大师”又开始了他的森林探险。看这镜头,水滴在叶子上闪烁,仿佛在说:“快来,快来,这里有故事!”他一边挖洞,一边像个新手厨师试图切洋葱——每一下都小心翼翼,生怕自己不小心挖出个“历史遗址”。坐下休息的时候,脸上的表情就像发现新大陆一样!然后,他拿起斧头砍树枝,简直是现代版的“神雕侠侣”,只不过对象是树木。最后,那堆树枝架过泥泞的小水坑,仿佛在说:“我就是不怕湿脚的勇士!”这就是我们的建造之旅!", + "OST": 2, + "new_timestamp": "00:51-01:37" + } + ] +} + +请求3: +curl -X 'POST' \ + 'http://127.0.0.1:8080/api/v2/scripts/start-subclip?task_id=12121' \ + -H 'accept: application/json' \ + -H 'Content-Type: application/json' \ + -d '{ + "request": { + "video_clip_json": [ + { + "timestamp": "00:10-01:01", + "picture": "好的,以下是视频画面的客观描述:\n\n视频展现一名留着胡须的男子在森林里挖掘。\n\n画面首先展现男子从后方视角,背着军绿色背包,穿着卡其色长裤和深色T恤,走向一个泥土斜坡。背包上似乎有一个镐头。\n\n下一个镜头特写展现了该背包,一个镐头从背包里伸出来,包里还有一些其他工具。\n\n然后,视频显示该男子用镐头挖掘泥土斜坡。\n\n接下来是一些近景镜头,展现男子的靴子在泥土中行走,以及男子用手清理泥土。\n\n其他镜头从不同角度展现该男子在挖掘,包括从侧面和上方。\n\n可以看到他用工具挖掘,清理泥土,并检查挖出的土壤。\n\n最后,一个镜头展现了挖出的土壤的质地和颜色。", + "narration": "好的,接下来就是我们这位“胡须大侠”的精彩冒险了!只见他背着军绿色的背包,迈着比我上班还不情愿的步伐走向那泥土斜坡。哎呀,这个背包可真是个宝贝,里面藏着一把镐头和一些工具,简直像是个随身携带的“建筑工具箱”! \n\n看他挥舞着镐头,挖掘泥土的姿势,仿佛在进行一场“挖土大赛”,结果却比我做饭还要糟糕。泥土飞扬中,他的靴子也成了“泥巴艺术家”。最后,那堆色泽各异的土壤就像他心情的写照——五彩斑斓又略显混乱!真是一次让人捧腹的建造之旅!", + "OST": 2, + "new_timestamp": "00:00-00:51" + }, + { + "timestamp": "01:07-01:53", + "picture": "好的,以下是视频画面的客观描述:\n\n视频以一系列森林环境的镜头开头。\n\n第一个镜头是一个特写镜头,镜头中显示的是一些带有水滴的绿色叶子。\n\n第二个镜头显示一个留着胡须的男子在森林中挖掘一个洞。 他跪在地上,用工具挖土。\n\n第三个镜头是一个中等镜头,显示同一个人坐在他挖好的洞边休息。\n\n第四个镜头显示该洞的内部结构,该洞在树根和地面之间。\n\n第五个镜头显示该男子用斧头砍树枝。\n\n第六个镜头显示一堆树枝横跨一个泥泞的小水坑。\n\n第七个镜头显示更多茂盛的树叶和树枝在阳光下。\n\n第八个镜头显示更多茂盛的树叶和树枝。\n\n\n", + "narration": "接下来,我们的“挖土大师”又开始了他的森林探险。看这镜头,水滴在叶子上闪烁,仿佛在说:“快来,快来,这里有故事!”他一边挖洞,一边像个新手厨师试图切洋葱——每一下都小心翼翼,生怕自己不小心挖出个“历史遗址”。坐下休息的时候,脸上的表情就像发现新大陆一样!然后,他拿起斧头砍树枝,简直是现代版的“神雕侠侣”,只不过对象是树木。最后,那堆树枝架过泥泞的小水坑,仿佛在说:“我就是不怕湿脚的勇士!”这就是我们的建造之旅!", + "OST": 2, + "new_timestamp": "00:51-01:37" + } + ], + "video_clip_json_path": "E:\\projects\\NarratoAI\\resource\\scripts\\2024-1118-230421.json", + "video_origin_path": "E:\\projects\\NarratoAI\\resource\\videos\\test.mp4", + "video_aspect": "16:9", + "video_language": "zh-CN", + "voice_name": "zh-CN-YunjianNeural", + "voice_volume": 1, + "voice_rate": 1.2, + "voice_pitch": 1, + "bgm_name": "random", + "bgm_type": "random", + "bgm_file": "", + "bgm_volume": 0.3, + "subtitle_enabled": true, + "subtitle_position": "bottom", + "font_name": "STHeitiMedium.ttc", + "text_fore_color": "#FFFFFF", + "text_background_color": "transparent", + "font_size": 75, + "stroke_color": "#000000", + "stroke_width": 1.5, + "custom_position": 70, + "n_threads": 8 + }, + "subclip_videos": { + "00:10-01:01": "E:\\projects\\NarratoAI\\storage\\cache_videos/vid-00_10-01_01.mp4", + "01:07-01:53": "E:\\projects\\NarratoAI\\storage\\cache_videos/vid-01_07-01_53.mp4" + } +}' +{ + "request": { + "video_clip_json": [ + { + "timestamp": "00:10-01:01", + "picture": "好的,以下是视频画面的客观描述:\n\n视频展现一名留着胡须的男子在森林里挖掘。\n\n画面首先展现男子从后方视角,背着军绿色背包,穿着卡其色长裤和深色T恤,走向一个泥土斜坡。背包上似乎有一个镐头。\n\n下一个镜头特写展现了该背包,一个镐头从背包里伸出来,包里还有一些其他工具。\n\n然后,视频显示该男子用镐头挖掘泥土斜坡。\n\n接下来是一些近景镜头,展现男子的靴子在泥土中行走,以及男子用手清理泥土。\n\n其他镜头从不同角度展现该男子在挖掘,包括从侧面和上方。\n\n可以看到他用工具挖掘,清理泥土,并检查挖出的土壤。\n\n最后,一个镜头展现了挖出的土壤的质地和颜色。", + "narration": "好的,接下来就是我们这位“胡须大侠”的精彩冒险了!只见他背着军绿色的背包,迈着比我上班还不情愿的步伐走向那泥土斜坡。哎呀,这个背包可真是个宝贝,里面藏着一把镐头和一些工具,简直像是个随身携带的“建筑工具箱”! \n\n看他挥舞着镐头,挖掘泥土的姿势,仿佛在进行一场“挖土大赛”,结果却比我做饭还要糟糕。泥土飞扬中,他的靴子也成了“泥巴艺术家”。最后,那堆色泽各异的土壤就像他心情的写照——五彩斑斓又略显混乱!真是一次让人捧腹的建造之旅!", + "OST": 2, + "new_timestamp": "00:00-00:51" + }, + { + "timestamp": "01:07-01:53", + "picture": "好的,以下是视频画面的客观描述:\n\n视频以一系列森林环境的镜头开头。\n\n第一个镜头是一个特写镜头,镜头中显示的是一些带有水滴的绿色叶子。\n\n第二个镜头显示一个留着胡须的男子在森林中挖掘一个洞。 他跪在地上,用工具挖土。\n\n第三个镜头是一个中等镜头,显示同一个人坐在他挖好的洞边休息。\n\n第四个镜头显示该洞的内部结构,该洞在树根和地面之间。\n\n第五个镜头显示该男子用斧头砍树枝。\n\n第六个镜头显示一堆树枝横跨一个泥泞的小水坑。\n\n第七个镜头显示更多茂盛的树叶和树枝在阳光下。\n\n第八个镜头显示更多茂盛的树叶和树枝。\n\n\n", + "narration": "接下来,我们的“挖土大师”又开始了他的森林探险。看这镜头,水滴在叶子上闪烁,仿佛在说:“快来,快来,这里有故事!”他一边挖洞,一边像个新手厨师试图切洋葱——每一下都小心翼翼,生怕自己不小心挖出个“历史遗址”。坐下休息的时候,脸上的表情就像发现新大陆一样!然后,他拿起斧头砍树枝,简直是现代版的“神雕侠侣”,只不过对象是树木。最后,那堆树枝架过泥泞的小水坑,仿佛在说:“我就是不怕湿脚的勇士!”这就是我们的建造之旅!", + "OST": 2, + "new_timestamp": "00:51-01:37" + } + ], + "video_clip_json_path": "E:\\projects\\NarratoAI\\resource\\scripts\\2024-1118-230421.json", + "video_origin_path": "E:\\projects\\NarratoAI\\resource\\videos\\test.mp4", + "video_aspect": "16:9", + "video_language": "zh-CN", + "voice_name": "zh-CN-YunjianNeural", + "voice_volume": 1, + "voice_rate": 1.2, + "voice_pitch": 1, + "bgm_name": "random", + "bgm_type": "random", + "bgm_file": "", + "bgm_volume": 0.3, + "subtitle_enabled": true, + "subtitle_position": "bottom", + "font_name": "STHeitiMedium.ttc", + "text_fore_color": "#FFFFFF", + "text_background_color": "transparent", + "font_size": 75, + "stroke_color": "#000000", + "stroke_width": 1.5, + "custom_position": 70, + "n_threads": 8 + }, + "subclip_videos": { + "00:10-01:01": "E:\\projects\\NarratoAI\\storage\\cache_videos/vid-00_10-01_01.mp4", + "01:07-01:53": "E:\\projects\\NarratoAI\\storage\\cache_videos/vid-01_07-01_53.mp4" + } +} + + +请在最外层新建一个pipeline 工作流执行逻辑的代码; +他会按照下面的顺序请求接口 +1.下载视频 +curl -X 'POST' \ + 'http://127.0.0.1:8080/api/v2/youtube/download' \ + -H 'accept: application/json' \ + -H 'Content-Type: application/json' \ + -d '{ + "url": "https://www.youtube.com/watch?v=Kenm35gdqtk", + "resolution": "1080p", + "output_format": "mp4", + "rename": "2024-11-19" +}' +2.生成脚本 +curl -X 'POST' \ + 'http://127.0.0.1:8080/api/v2/scripts/generate' \ + -H 'accept: application/json' \ + -H 'Content-Type: application/json' \ + -d '{ + "video_path": "E:\\projects\\NarratoAI\\resource\\videos\\test.mp4", + "skip_seconds": 0, + "threshold": 30, + "vision_batch_size": 10, + "vision_llm_provider": "gemini" +}' +3. 剪辑视频 +curl -X 'POST' \ + 'http://127.0.0.1:8080/api/v2/scripts/crop' \ + -H 'accept: application/json' \ + -H 'Content-Type: application/json' \ + -d '{ + "video_origin_path": "E:\\projects\\NarratoAI\\resource\\videos\\test.mp4", + "video_script": [ + { + "timestamp": "00:10-01:01", + "picture": "好的,以下是视频画面的客观描述:\n\n视频展现一名留着胡须的男子在森林里挖掘。\n\n画面首先展现男子从后方视角,背着军绿色背包,穿着卡其色长裤和深色T恤,走向一个泥土斜坡。背包上似乎有一个镐头。\n\n下一个镜头特写展现了该背包,一个镐头从背包里伸出来,包里还有一些其他工具。\n\n然后,视频显示该男子用镐头挖掘泥土斜坡。\n\n接下来是一些近景镜头,展现男子的靴子在泥土中行走,以及男子用手清理泥土。\n\n其他镜头从不同角度展现该男子在挖掘,包括从侧面和上方。\n\n可以看到他用工具挖掘,清理泥土,并检查挖出的土壤。\n\n最后,一个镜头展现了挖出的土壤的质地和颜色。", + "narration": "好的,接下来就是我们这位“胡须大侠”的精彩冒险了!只见他背着军绿色的背包,迈着比我上班还不情愿的步伐走向那泥土斜坡。哎呀,这个背包可真是个宝贝,里面藏着一把镐头和一些工具,简直像是个随身携带的“建筑工具箱”! \n\n看他挥舞着镐头,挖掘泥土的姿势,仿佛在进行一场“挖土大赛”,结果却比我做饭还要糟糕。泥土飞扬中,他的靴子也成了“泥巴艺术家”。最后,那堆色泽各异的土壤就像他心情的写照——五彩斑斓又略显混乱!真是一次让人捧腹的建造之旅!", + "OST": 2, + "new_timestamp": "00:00-00:51" + }, + { + "timestamp": "01:07-01:53", + "picture": "好的,以下是视频画面的客观描述:\n\n视频以一系列森林环境的镜头开头。\n\n第一个镜头是一个特写镜头,镜头中显示的是一些带有水滴的绿色叶子。\n\n第二个镜头显示一个留着胡须的男子在森林中挖掘一个洞。 他跪在地上,用工具挖土。\n\n第三个镜头是一个中等镜头,显示同一个人坐在他挖好的洞边休息。\n\n第四个镜头显示该洞的内部结构,该洞在树根和地面之间。\n\n第五个镜头显示该男子用斧头砍树枝。\n\n第六个镜头显示一堆树枝横跨一个泥泞的小水坑。\n\n第七个镜头显示更多茂盛的树叶和树枝在阳光下。\n\n第八个镜头显示更多茂盛的树叶和树枝。\n\n\n", + "narration": "接下来,我们的“挖土大师”又开始了他的森林探险。看这镜头,水滴在叶子上闪烁,仿佛在说:“快来,快来,这里有故事!”他一边挖洞,一边像个新手厨师试图切洋葱——每一下都小心翼翼,生怕自己不小心挖出个“历史遗址”。坐下休息的时候,脸上的表情就像发现新大陆一样!然后,他拿起斧头砍树枝,简直是现代版的“神雕侠侣”,只不过对象是树木。最后,那堆树枝架过泥泞的小水坑,仿佛在说:“我就是不怕湿脚的勇士!”这就是我们的建造之旅!", + "OST": 2, + "new_timestamp": "00:51-01:37" + } + ] +}' +4.生成视频 +curl -X 'POST' \ + 'http://127.0.0.1:8080/api/v2/scripts/start-subclip?task_id=12121' \ + -H 'accept: application/json' \ + -H 'Content-Type: application/json' \ + -d '{ + "request": { + "video_clip_json": [ + { + "timestamp": "00:10-01:01", + "picture": "好的,以下是视频画面的客观描述:\n\n视频展现一名留着胡须的男子在森林里挖掘。\n\n画面首先展现男子从后方视角,背着军绿色背包,穿着卡其色长裤和深色T恤,走向一个泥土斜坡。背包上似乎有一个镐头。\n\n下一个镜头特写展现了该背包,一个镐头从背包里伸出来,包里还有一些其他工具。\n\n然后,视频显示该男子用镐头挖掘泥土斜坡。\n\n接下来是一些近景镜头,展现男子的靴子在泥土中行走,以及男子用手清理泥土。\n\n其他镜头从不同角度展现该男子在挖掘,包括从侧面和上方。\n\n可以看到他用工具挖掘,清理泥土,并检查挖出的土壤。\n\n最后,一个镜头展现了挖出的土壤的质地和颜色。", + "narration": "好的,接下来就是我们这位“胡须大侠”的精彩冒险了!只见他背着军绿色的背包,迈着比我上班还不情愿的步伐走向那泥土斜坡。哎呀,这个背包可真是个宝贝,里面藏着一把镐头和一些工具,简直像是个随身携带的“建筑工具箱”! \n\n看他挥舞着镐头,挖掘泥土的姿势,仿佛在进行一场“挖土大赛”,结果却比我做饭还要糟糕。泥土飞扬中,他的靴子也成了“泥巴艺术家”。最后,那堆色泽各异的土壤就像他心情的写照——五彩斑斓又略显混乱!真是一次让人捧腹的建造之旅!", + "OST": 2, + "new_timestamp": "00:00-00:51" + }, + { + "timestamp": "01:07-01:53", + "picture": "好的,以下是视频画面的客观描述:\n\n视频以一系列森林环境的镜头开头。\n\n第一个镜头是一个特写镜头,镜头中显示的是一些带有水滴的绿色叶子。\n\n第二个镜头显示一个留着胡须的男子在森林中挖掘一个洞。 他跪在地上,用工具挖土。\n\n第三个镜头是一个中等镜头,显示同一个人坐在他挖好的洞边休息。\n\n第四个镜头显示该洞的内部结构,该洞在树根和地面之间。\n\n第五个镜头显示该男子用斧头砍树枝。\n\n第六个镜头显示一堆树枝横跨一个泥泞的小水坑。\n\n第七个镜头显示更多茂盛的树叶和树枝在阳光下。\n\n第八个镜头显示更多茂盛的树叶和树枝。\n\n\n", + "narration": "接下来,我们的“挖土大师”又开始了他的森林探险。看这镜头,水滴在叶子上闪烁,仿佛在说:“快来,快来,这里有故事!”他一边挖洞,一边像个新手厨师试图切洋葱——每一下都小心翼翼,生怕自己不小心挖出个“历史遗址”。坐下休息的时候,脸上的表情就像发现新大陆一样!然后,他拿起斧头砍树枝,简直是现代版的“神雕侠侣”,只不过对象是树木。最后,那堆树枝架过泥泞的小水坑,仿佛在说:“我就是不怕湿脚的勇士!”这就是我们的建造之旅!", + "OST": 2, + "new_timestamp": "00:51-01:37" + } + ], + "video_clip_json_path": "E:\\projects\\NarratoAI\\resource\\scripts\\2024-1118-230421.json", + "video_origin_path": "E:\\projects\\NarratoAI\\resource\\videos\\test.mp4", + "video_aspect": "16:9", + "video_language": "zh-CN", + "voice_name": "zh-CN-YunjianNeural", + "voice_volume": 1, + "voice_rate": 1.2, + "voice_pitch": 1, + "bgm_name": "random", + "bgm_type": "random", + "bgm_file": "", + "bgm_volume": 0.3, + "subtitle_enabled": true, + "subtitle_position": "bottom", + "font_name": "STHeitiMedium.ttc", + "text_fore_color": "#FFFFFF", + "text_background_color": "transparent", + "font_size": 75, + "stroke_color": "#000000", + "stroke_width": 1.5, + "custom_position": 70, + "n_threads": 8 + }, + "subclip_videos": { + "00:10-01:01": "E:\\projects\\NarratoAI\\storage\\cache_videos/vid-00_10-01_01.mp4", + "01:07-01:53": "E:\\projects\\NarratoAI\\storage\\cache_videos/vid-01_07-01_53.mp4" + } +}' + +请求1,返回的参数是: +{ + "task_id": "4e9b575f-68c0-4ae1-b218-db42b67993d0", + "output_path": "E:\\projects\\NarratoAI\\resource\\videos\\2024-11-19.mp4", + "resolution": "1080p", + "format": "mp4", + "filename": "2024-11-19.mp4" +} +output_path需要传递给请求2 +请求2,返回数据为: +{ + "task_id": "04497017-953c-44b4-bf1d-9d8ed3ebbbce", + "script": [ + { + "timestamp": "00:10-01:01", + "picture": "好的,以下是對影片畫面的客觀描述:\n\n影片顯示一名留著鬍鬚的男子在一處樹林茂密的斜坡上挖掘。\n\n畫面一:男子從後方出現,背著一個軍綠色的背包,背包裡似乎裝有工具。他穿著卡其色的長褲和深色的登山鞋。\n\n畫面二:特寫鏡頭顯示男子的背包,一個舊的鎬頭從包裡露出來,包裡還有其他工具,包括一個鏟子。\n\n畫面三:男子用鎬頭在斜坡上挖土,背包放在他旁邊。\n\n畫面四:特寫鏡頭顯示男子的登山鞋在泥土中。\n\n畫面五:男子坐在斜坡上,用手清理樹根和泥土。\n\n畫面六:地上有一些鬆動的泥土和落葉。\n\n畫面七:男子的背包近景鏡頭,他正在挖掘。\n\n畫面八:男子在斜坡上挖掘,揚起一陣塵土。\n\n畫面九:特寫鏡頭顯示男子用手清理泥土。\n\n畫面十:特寫鏡頭顯示挖出的泥土剖面,可以看到土壤的層次。", + "narration": "上一个画面是我在绝美的自然中,准备开启我的“土豪”挖掘之旅。现在,你们看到这位留着胡子的“大哥”,他背着个军绿色的包,里面装的可不仅仅是工具,还有我对生活的无限热爱(以及一丝不安)。看!这把旧镐头就像我的前任——用起来费劲,但又舍不得扔掉。\n\n他在斜坡上挖土,泥土飞扬,仿佛在跟大地进行一场“泥巴大战”。每一铲下去,都能听到大地微微的呻吟:哎呀,我这颗小树根可比我当年的情感纠葛还难处理呢!别担心,这些泥土层次分明,简直可以开个“泥土博物馆”。所以,朋友们,跟着我一起享受这场泥泞中的乐趣吧!", + "OST": 2, + "new_timestamp": "00:00-00:51" + }, + { + "timestamp": "01:07-01:53", + "picture": "好的,以下是對影片畫面內容的客觀描述:\n\n影片以一系列森林環境的鏡頭開始。第一個鏡頭展示了綠葉植物的特寫鏡頭,葉子上有一些水珠。接下來的鏡頭是一個男人在森林裡挖掘一個小坑,他跪在地上,用鏟子挖土。\n\n接下來的鏡頭是同一個男人坐在他挖的坑旁邊,望著前方。然後,鏡頭顯示該坑的廣角鏡頭,顯示其結構和大小。\n\n之後的鏡頭,同一個男人在樹林裡劈柴。鏡頭最後呈現出一潭渾濁的水,周圍環繞著樹枝。然後鏡頭又回到了森林裡生長茂盛的植物特寫鏡頭。", + "narration": "好嘞,朋友们,我们已经在泥土博物馆里捣鼓了一阵子,现在是时候跟大自然亲密接触了!看看这片森林,绿叶上水珠闪闪发光,就像我曾经的爱情,虽然短暂,却美得让人心碎。\n\n现在,我在这里挖个小坑,感觉自己就像是一位新晋“挖土大王”,不过说实话,这手艺真不敢恭维,连铲子都快对我崩溃了。再说劈柴,这动作简直比我前任的情绪波动还要激烈!最后这一潭浑浊的水,别担心,它只是告诉我:生活就像这水,总有些杂质,但也别忘了,要勇敢面对哦!", + "OST": 2, + "new_timestamp": "00:51-01:37" + } + ] +} +output_path和script参数需要传递给请求3 +请求3返回参数是 +{ + "task_id": "b6f5a98a-b2e0-4e3d-89c5-64fb90db2ec1", + "subclip_videos": { + "00:10-01:01": "E:\\projects\\NarratoAI\\storage\\cache_videos/vid-00_10-01_01.mp4", + "01:07-01:53": "E:\\projects\\NarratoAI\\storage\\cache_videos/vid-01_07-01_53.mp4" + } +} +subclip_videos和 output_path和script参数需要传递给请求4 +最后完成工作流 \ No newline at end of file diff --git a/webui/components/audio_settings.py b/webui/components/audio_settings.py index a189f65..f81effe 100644 --- a/webui/components/audio_settings.py +++ b/webui/components/audio_settings.py @@ -20,7 +20,7 @@ def render_audio_panel(tr): def render_tts_settings(tr): """渲染TTS(文本转语音)设置""" # 获取支持的语音列表 - support_locales = ["zh-CN", "zh-HK", "zh-TW", "en-US"] + support_locales = ["zh-CN"] voices = voice.get_all_azure_voices(filter_locals=support_locales) # 创建友好的显示名称 From 38f23983ef0c80f270fd5c6bef118acdeb76cd34 Mon Sep 17 00:00:00 2001 From: linyq Date: Tue, 19 Nov 2024 14:50:30 +0800 Subject: [PATCH 5/9] =?UTF-8?q?refactor(video=5Fpipeline):=20=E9=87=8D?= =?UTF-8?q?=E6=9E=84=E8=A7=86=E9=A2=91=E5=A4=84=E7=90=86=E7=AE=A1=E9=81=93?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - 新增参数以支持更灵活的配置 -优化脚本保存逻辑,支持自定义脚本名称 -增加视频下载和脚本生成的条件判断,提高效率 - 异步生成最终视频,返回任务路径 - 更新示例使用新的管道配置 --- .../video_pipeline.py => video_pipeline.py | 81 ++++++++++++------- 1 file changed, 50 insertions(+), 31 deletions(-) rename app/pipeline/video_pipeline.py => video_pipeline.py (62%) diff --git a/app/pipeline/video_pipeline.py b/video_pipeline.py similarity index 62% rename from app/pipeline/video_pipeline.py rename to video_pipeline.py index 1c54bad..3e35544 100644 --- a/app/pipeline/video_pipeline.py +++ b/video_pipeline.py @@ -1,5 +1,6 @@ import requests import json +import os import time from typing import Dict, Any @@ -52,7 +53,7 @@ class VideoPipeline: return response.json() def generate_final_video(self, task_id: str, video_path: str, - script_path: str, script: list, subclip_videos: Dict[str, str]) -> Dict[str, Any]: + script_path: str, script: list, subclip_videos: Dict[str, str], voice_name: str) -> Dict[str, Any]: """生成最终视频的第四步""" endpoint = f"{self.base_url}/api/v2/scripts/start-subclip" @@ -62,7 +63,7 @@ class VideoPipeline: "video_origin_path": video_path, "video_aspect": "16:9", "video_language": "zh-CN", - "voice_name": "zh-CN-YunjianNeural", + "voice_name": voice_name, "voice_volume": 1, "voice_rate": 1.2, "voice_pitch": 1, @@ -92,10 +93,9 @@ class VideoPipeline: response.raise_for_status() return response.json() - def save_script_to_json(self, script: list) -> str: + def save_script_to_json(self, script: list, script_name: str) -> str: """保存脚本到json文件""" - timestamp = time.strftime("%Y-%m%d-%H%M%S") - script_path = f"E:\\projects\\NarratoAI\\resource\\scripts\\{timestamp}.json" + script_path = f"E:\\projects\\NarratoAI\\resource\\scripts\\{script_name}.json" try: with open(script_path, 'w', encoding='utf-8') as f: @@ -106,45 +106,55 @@ class VideoPipeline: print(f"保存脚本失败: {str(e)}") raise - def run_pipeline(self, youtube_url: str) -> Dict[str, Any]: + def run_pipeline(self, task_id: str, script_name: str, youtube_url: str, video_name: str="null", skip_seconds: int = 0, threshold: int = 30, vision_batch_size: int = 10, vision_llm_provider: str = "gemini", voice_name: str = "zh-CN-YunjianNeural") -> Dict[str, Any]: """运行完整的pipeline""" try: - # 1. 下载视频 - print("开始下载视频...") - download_result = self.download_video(youtube_url) - video_path = download_result["output_path"] + current_path = os.path.dirname(os.path.abspath(__file__)) + video_path = os.path.join(current_path, "resource", "videos", video_name) + # 判断视频是否存在 + if not os.path.exists(video_path): + # 1. 下载视频 + print(f"视频不存在, 开始下载视频: {video_path}") + download_result = self.download_video(youtube_url=youtube_url, resolution="1080p", output_format="mp4", rename=video_name) + video_path = download_result["output_path"] + else: + print(f"视频已存在: {video_path}") - # 2. 生成脚本 - print("开始生成脚本...") - script_result = self.generate_script(video_path) - script = script_result["script"] + # 2. 判断script_name是否存在 + # 2.1.1 拼接脚本路径 NarratoAI/resource/scripts + script_path = os.path.join(current_path, "resource", "scripts", script_name) + if os.path.exists(script_path): + script = json.load(open(script_path, "r", encoding="utf-8")) + else: + # 2.1.2 生成脚本 + print("开始生成脚本...") + script_result = self.generate_script(video_path=video_path, skip_seconds=skip_seconds, threshold=threshold, vision_batch_size=vision_batch_size, vision_llm_provider=vision_llm_provider) + script = script_result["script"] - # 2.1 保存脚本到json文件 + # 2.2 保存脚本到json文件 print("保存脚本到json文件...") - script_path = self.save_script_to_json(script) + script_path = self.save_script_to_json(script, script_name) script_result["script_path"] = script_path # 3. 剪辑视频 print("开始剪辑视频...") - crop_result = self.crop_video(video_path, script) + crop_result = self.crop_video(video_path=video_path, script=script) subclip_videos = crop_result["subclip_videos"] # 4. 生成最终视频 print("开始生成最终视频...") final_result = self.generate_final_video( - crop_result["task_id"], - video_path, - script_path, - script, - subclip_videos + task_id=task_id, + video_path=video_path, + script_path=script_path, + script=script, + subclip_videos=subclip_videos, + voice_name=voice_name ) return { - "status": "success", - "download_result": download_result, - "script_result": script_result, - "crop_result": crop_result, - "final_result": final_result + "status": "等待异步生成视频", + "path": os.path.join(current_path, "storage", "tasks", task_id) } except Exception as e: @@ -153,10 +163,19 @@ class VideoPipeline: "error": str(e) } + # 使用示例 if __name__ == "__main__": pipeline = VideoPipeline() - result = pipeline.run_pipeline("https://www.youtube.com/watch?v=Kenm35gdqtk") - print(json.dumps(result, indent=2, ensure_ascii=False)) - result2 = pipeline.run_pipeline("https://www.youtube.com/watch?v=aEsHAcedzgw") - print(json.dumps(result2, indent=2, ensure_ascii=False)) + result = pipeline.run_pipeline( + task_id="test_123", + script_name="test.json", + youtube_url="https://www.youtube.com/watch?v=Kenm35gdqtk", + video_name="test.mp4", + skip_seconds=0, + threshold=30, + vision_batch_size=10, + vision_llm_provider="gemini", + voice_name="zh-CN-YunjianNeural", + ) + print(result) From 1be304a696c530da50d5b7da0fa33b867af13421 Mon Sep 17 00:00:00 2001 From: linyqh Date: Wed, 20 Nov 2024 00:34:11 +0800 Subject: [PATCH 6/9] =?UTF-8?q?feat(subtitle):=20=E6=B7=BB=E5=8A=A0?= =?UTF-8?q?=E4=BB=8E=E8=A7=86=E9=A2=91=E6=8F=90=E5=8F=96=E9=9F=B3=E9=A2=91?= =?UTF-8?q?=E5=B9=B6=E7=94=9F=E6=88=90=E5=AD=97=E5=B9=95=E7=9A=84=E5=8A=9F?= =?UTF-8?q?=E8=83=BD?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - 新增 extract_audio_and_create_subtitle函数,用于从视频文件中提取音频并生成字幕文件 - 更新 video_pipeline.py,修改视频下载和处理的相关参数 --- app/services/subtitle.py | 95 ++++++++++++++++++++++++++++++++-------- video_pipeline.py | 14 +++--- 2 files changed, 84 insertions(+), 25 deletions(-) diff --git a/app/services/subtitle.py b/app/services/subtitle.py index f37eb65..7b18e8d 100644 --- a/app/services/subtitle.py +++ b/app/services/subtitle.py @@ -8,6 +8,8 @@ from faster_whisper import WhisperModel from timeit import default_timer as timer from loguru import logger import google.generativeai as genai +from moviepy.editor import VideoFileClip +import os from app.config import config from app.utils import utils @@ -362,29 +364,86 @@ def create_with_gemini(audio_file: str, subtitle_file: str = "", api_key: Option return None +def extract_audio_and_create_subtitle(video_file: str, subtitle_file: str = "") -> Optional[str]: + """ + 从视频文件中提取音频并生成字幕文件。 + + 参数: + - video_file: MP4视频文件的路径 + - subtitle_file: 输出字幕文件的路径(可选)。如果未提供,将根据视频文件名自动生成。 + + 返回: + - str: 生成的字幕文件路径 + - None: 如果处理过程中出现错误 + """ + try: + # 获取视频文件所在目录 + video_dir = os.path.dirname(video_file) + video_name = os.path.splitext(os.path.basename(video_file))[0] + + # 设置音频文件路径 + audio_file = os.path.join(video_dir, f"{video_name}_audio.wav") + + # 如果未指定字幕文件路径,则自动生成 + if not subtitle_file: + subtitle_file = os.path.join(video_dir, f"{video_name}.srt") + + logger.info(f"开始从视频提取音频: {video_file}") + + # 加载视频文件 + video = VideoFileClip(video_file) + + # 提取音频并保存为WAV格式 + logger.info(f"正在提取音频到: {audio_file}") + video.audio.write_audiofile(audio_file, codec='pcm_s16le') + + # 关闭视频文件 + video.close() + + logger.info("音频提取完成,开始生成字幕") + + # 使用create函数生成字幕 + create(audio_file, subtitle_file) + + # 删除临时音频文件 + if os.path.exists(audio_file): + os.remove(audio_file) + logger.info("已清理临时音频文件") + + return subtitle_file + + except Exception as e: + logger.error(f"处理视频文件时出错: {str(e)}") + logger.error(traceback.format_exc()) + return None + + if __name__ == "__main__": - task_id = "test456" + task_id = "12121" task_dir = utils.task_dir(task_id) subtitle_file = f"{task_dir}/subtitle.srt" audio_file = f"{task_dir}/audio.wav" + video_file = f"{task_dir}/duanju_demo.mp4" - subtitles = file_to_subtitles(subtitle_file) - print(subtitles) + extract_audio_and_create_subtitle(video_file, subtitle_file) - # script_file = f"{task_dir}/script.json" - # with open(script_file, "r") as f: - # script_content = f.read() - # s = json.loads(script_content) - # script = s.get("script") - # - # correct(subtitle_file, script) + # subtitles = file_to_subtitles(subtitle_file) + # print(subtitles) - subtitle_file = f"{task_dir}/subtitle111.srt" - create(audio_file, subtitle_file) + # # script_file = f"{task_dir}/script.json" + # # with open(script_file, "r") as f: + # # script_content = f.read() + # # s = json.loads(script_content) + # # script = s.get("script") + # # + # # correct(subtitle_file, script) - # # 使用Gemini模型处理音频 - # gemini_api_key = config.app.get("gemini_api_key") # 请替换为实际的API密钥 - # gemini_subtitle_file = create_with_gemini(audio_file, api_key=gemini_api_key) - # - # if gemini_subtitle_file: - # print(f"Gemini生成的字幕文件: {gemini_subtitle_file}") + # subtitle_file = f"{task_dir}/subtitle111.srt" + # create(audio_file, subtitle_file) + + # # # 使用Gemini模型处理音频 + # # gemini_api_key = config.app.get("gemini_api_key") # 请替换为实际的API密钥 + # # gemini_subtitle_file = create_with_gemini(audio_file, api_key=gemini_api_key) + # # + # # if gemini_subtitle_file: + # # print(f"Gemini生成的字幕文件: {gemini_subtitle_file}") diff --git a/video_pipeline.py b/video_pipeline.py index 3e35544..5dca576 100644 --- a/video_pipeline.py +++ b/video_pipeline.py @@ -110,12 +110,12 @@ class VideoPipeline: """运行完整的pipeline""" try: current_path = os.path.dirname(os.path.abspath(__file__)) - video_path = os.path.join(current_path, "resource", "videos", video_name) + video_path = os.path.join(current_path, "resource", "videos", f"{video_name}.mp4") # 判断视频是否存在 if not os.path.exists(video_path): # 1. 下载视频 print(f"视频不存在, 开始下载视频: {video_path}") - download_result = self.download_video(youtube_url=youtube_url, resolution="1080p", output_format="mp4", rename=video_name) + download_result = self.download_video(url=youtube_url, resolution="1080p", output_format="mp4", rename=video_name) video_path = download_result["output_path"] else: print(f"视频已存在: {video_path}") @@ -168,12 +168,12 @@ class VideoPipeline: if __name__ == "__main__": pipeline = VideoPipeline() result = pipeline.run_pipeline( - task_id="test_123", + task_id="test_111901", script_name="test.json", - youtube_url="https://www.youtube.com/watch?v=Kenm35gdqtk", - video_name="test.mp4", - skip_seconds=0, - threshold=30, + youtube_url="https://www.youtube.com/watch?v=vLJ7Yed6FQ4", + video_name="2024-11-19-01", + skip_seconds=50, + threshold=35, vision_batch_size=10, vision_llm_provider="gemini", voice_name="zh-CN-YunjianNeural", From f3248ef03a57432fab0ec9e4f7162810cb712a4b Mon Sep 17 00:00:00 2001 From: linyqh Date: Wed, 20 Nov 2024 02:45:52 +0800 Subject: [PATCH 7/9] =?UTF-8?q?feat(test):=20=E6=B7=BB=E5=8A=A0=E4=B8=8E?= =?UTF-8?q?=E9=80=9A=E4=B9=89=E5=8D=83=E9=97=AEAI=E6=A8=A1=E5=9E=8B?= =?UTF-8?q?=E5=AF=B9=E8=AF=9D=E7=9A=84=E5=8A=9F=E8=83=BD?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - 新增 chat_with_qwen 函数,用于与通义千问AI模型进行对话 - 添加错误处理和资源管理,提高代码健壮性 - 优化视频剪辑功能,增加输出路径参数 -读取字幕文件并将其作为输入发送给AI模型 -处理API调用异常,并提供错误文档链接 --- app/test/test_moviepy.py | 68 +++++++++++++++++++++-------- app/test/test_qwen.py | 93 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 143 insertions(+), 18 deletions(-) create mode 100644 app/test/test_qwen.py diff --git a/app/test/test_moviepy.py b/app/test/test_moviepy.py index 208b708..5b24ebf 100644 --- a/app/test/test_moviepy.py +++ b/app/test/test_moviepy.py @@ -4,6 +4,7 @@ from moviepy.editor import VideoFileClip from datetime import datetime +import os def time_str_to_seconds(time_str: str) -> float: @@ -38,25 +39,56 @@ def cut_video(video_path: str, start_time: str, end_time: str, output_path: str) video_path: 视频文件路径 start_time: 开始时间 (格式: "MM:SS") end_time: 结束时间 (格式: "MM:SS") + output_path: 输出文件路径 """ - # 转换时间字符串为秒数 - start_seconds = time_str_to_seconds(start_time) - end_seconds = time_str_to_seconds(end_time) - - # 加载视频文件 - video = VideoFileClip(video_path) - - # 计算剪辑时长 - clip_duration = end_seconds - start_seconds - print(f"原视频总长度: {format_duration(video.duration)}") - print(f"剪辑时长: {format_duration(clip_duration)}") - - # 剪辑视频 - video = video.subclip(start_seconds, end_seconds) - video.write_videofile("../../resource/videos/cut_video3.mp4") - - # 释放资源 - video.close() + try: + # 确保输出目录存在 + output_dir = os.path.dirname(output_path) + if not os.path.exists(output_dir): + os.makedirs(output_dir) + + # 如果输出文件已存在,先尝试删除 + if os.path.exists(output_path): + try: + os.remove(output_path) + except PermissionError: + print(f"无法删除已存在的文件:{output_path},请确保文件未被其他程序占用") + return + + # 转换时间字符串为秒数 + start_seconds = time_str_to_seconds(start_time) + end_seconds = time_str_to_seconds(end_time) + + # 加载视频文件 + video = VideoFileClip(video_path) + + # 计算剪辑时长 + clip_duration = end_seconds - start_seconds + print(f"原视频总长度: {format_duration(video.duration)}") + print(f"剪辑时长: {format_duration(clip_duration)}") + + # 剪辑视频 + video = video.subclip(start_seconds, end_seconds) + + # 添加错误处理的写入过程 + try: + video.write_videofile( + output_path, + codec='libx264', + audio_codec='aac', + temp_audiofile='temp-audio.m4a', + remove_temp=True + ) + except IOError as e: + print(f"写入视频文件时发生错误:{str(e)}") + raise + finally: + # 确保资源被释放 + video.close() + + except Exception as e: + print(f"视频剪辑过程中发生错误:{str(e)}") + raise if __name__ == "__main__": diff --git a/app/test/test_qwen.py b/app/test/test_qwen.py new file mode 100644 index 0000000..77bca56 --- /dev/null +++ b/app/test/test_qwen.py @@ -0,0 +1,93 @@ +import os +import traceback +import json +from openai import OpenAI +from test_moviepy import cut_video +from app.utils import utils +from app.services.subtitle import extract_audio_and_create_subtitle + + +def chat_with_qwen(prompt: str, system_message: str, subtitle_path: str) -> str: + """ + 与通义千问AI模型进行对话 + + Args: + prompt (str): 用户输入的问题或提示 + system_message (str): 系统提示信息,用于设定AI助手的行为。默认为"You are a helpful assistant." + subtitle_path (str): 字幕文件路径 + Returns: + str: AI助手的回复内容 + + Raises: + Exception: 当API调用失败时抛出异常 + """ + try: + client = OpenAI( + api_key="sk-", + base_url="https://dashscope.aliyuncs.com/compatible-mode/v1", + ) + + # 读取字幕文件 + with open(subtitle_path, "r", encoding="utf-8") as file: + subtitle_content = file.read() + + completion = client.chat.completions.create( + model="qwen-turbo-2024-11-01", + messages=[ + {'role': 'system', 'content': system_message}, + {'role': 'user', 'content': prompt + subtitle_content} + ] + ) + return completion.choices[0].message.content + + except Exception as e: + error_message = f"调用千问API时发生错误:{str(e)}" + print(error_message) + print("请参考文档:https://help.aliyun.com/zh/model-studio/developer-reference/error-code") + raise Exception(error_message) + + +# 使用示例 +if __name__ == "__main__": + try: + # video_path = utils.video_dir("duanju_yuansp.mp4") + # # 判断视频是否存在 + # if not os.path.exists(video_path): + # print(f"视频文件不存在:{video_path}") + # exit(1) + # 提取字幕 + subtitle_path = os.path.join(utils.video_dir(""), f"duanju_yuan.srt") + # extract_audio_and_create_subtitle(video_file=video_path, subtitle_file=subtitle_path) + # 分析字幕 + system_message = """ + 你是一个视频srt字幕分析剪辑器, 输入视频的srt字幕, 分析其中的精彩且尽可能连续的片段并裁剪出来, 注意确保文字与时间戳的正确匹配。 + 输出需严格按照如下 json 格式: + [ + { + "timestamp": "00:50-01:44", + "picture": "画面1", + "narration": "播放原声", + "OST": 0, + "new_timestamp": "00:00-00:54" + }, + { + "timestamp": "01:49-02:30", + "picture": "画面2", + "narration": "播放原声", + "OST": 2, + "new_timestamp": "00:54-01:35" + }, + ] + """ + prompt = "字幕如下:\n" + response = chat_with_qwen(prompt, system_message, subtitle_path) + print(response) + # 保存json,注意json中是时间戳需要转换为 分:秒(现在的时间是 "timestamp": "00:00:00,020-00:00:01,660", 需要转换为 "timestamp": "00:00-01:66") + # response = json.loads(response) + # for item in response: + # item["timestamp"] = item["timestamp"].replace(":", "-") + # with open(os.path.join(utils.video_dir(""), "duanju_yuan.json"), "w", encoding="utf-8") as file: + # json.dump(response, file, ensure_ascii=False) + + except Exception as e: + print(traceback.format_exc()) From 86d398d8fd5eee96d73f8b53fb46d55c606c801e Mon Sep 17 00:00:00 2001 From: linyq Date: Wed, 20 Nov 2024 18:12:45 +0800 Subject: [PATCH 8/9] =?UTF-8?q?feat(audio):=20=E6=94=B9=E8=BF=9B=E9=9F=B3?= =?UTF-8?q?=E9=A2=91=E5=90=88=E5=B9=B6=E5=8A=9F=E8=83=BD=EF=BC=8C=E6=94=AF?= =?UTF-8?q?=E6=8C=81=20OST=20=E8=AE=BE=E7=BD=AE=EF=BC=8C=E6=8F=90=E5=8D=87?= =?UTF-8?q?=E6=97=B6=E9=97=B4=E6=88=B3=E7=B2=BE=E5=BA=A6?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit -重构了 merge_audio_files 函数,增加了对 OST 设置的支持 - 新增 time_to_seconds 函数,支持多种时间格式的转换 - 修改了 audio_merger 模块的逻辑,根据 OST 设置处理音频 - 更新了 task 模块中的 start_subclip 函数,传入 OST 信息 - 优化了 subtitle 和 video 模块的逻辑,适应新的音频处理方式 --- app/models/schema.py | 2 + app/services/audio_merger.py | 174 +++++++++-------- app/services/material.py | 73 +++++-- app/services/task.py | 203 +++++--------------- app/services/video.py | 360 +++++++++++++++++++++++------------ app/test/test_moviepy.py | 53 ++++-- app/test/test_qwen.py | 26 ++- app/utils/utils.py | 72 ++++++- video_pipeline.py | 11 +- webui.txt | 4 +- 10 files changed, 566 insertions(+), 412 deletions(-) diff --git a/app/models/schema.py b/app/models/schema.py index 9d0c5d4..6621772 100644 --- a/app/models/schema.py +++ b/app/models/schema.py @@ -366,6 +366,8 @@ class VideoClipParams(BaseModel): custom_position: float = Field(default=70.0, description="自定义位置") n_threads: Optional[int] = 8 # 线程数,有助于提升视频处理速度 + tts_volume: float = 1.0 # TTS音频音量 + video_volume: float = 0.1 # 视频原声音量 class VideoTranscriptionRequest(BaseModel): video_name: str diff --git a/app/services/audio_merger.py b/app/services/audio_merger.py index f0face0..c7edc77 100644 --- a/app/services/audio_merger.py +++ b/app/services/audio_merger.py @@ -18,95 +18,119 @@ def check_ffmpeg(): return False -def merge_audio_files(task_id: str, audio_file_paths: List[str], total_duration: int, video_script: list): +def merge_audio_files(task_id: str, audio_files: list, total_duration: float, list_script: list): """ - 合并多个音频文件到一个指定总时长的音频文件中,并生成相应的字幕 - :param task_id: 任务ID - :param audio_file_paths: 音频文件路径列表 - :param total_duration: 最终音频文件的总时长(秒) - :param video_script: JSON格式的视频脚本 + 合并音频文件,根据OST设置处理不同的音频轨道 + + Args: + task_id: 任务ID + audio_files: TTS生成的音频文件列表 + total_duration: 总时长 + list_script: 完整脚本信息,包含OST设置 + + Returns: + str: 合并后的音频文件路径 """ - output_dir = utils.task_dir(task_id) - + # 检查FFmpeg是否安装 if not check_ffmpeg(): - logger.error("错误:FFmpeg未安装。请安装FFmpeg后再运行此脚本。") - return None, None + logger.error("FFmpeg未安装,无法合并音频文件") + return None - # 创建一个总时长为total_duration的空白音频 - blank_audio = AudioSegment.silent(duration=total_duration * 1000) # pydub使用毫秒 + # 创建一个空的音频片段 + final_audio = AudioSegment.silent(duration=total_duration * 1000) # 总时长以毫秒为单位 - for audio_path in audio_file_paths: - if not os.path.exists(audio_path): - logger.info(f"警告:文件 {audio_path} 不存在,已跳过。") + # 遍历脚本中的每个片段 + for segment, audio_file in zip(list_script, audio_files): + try: + # 加载TTS音频文件 + tts_audio = AudioSegment.from_file(audio_file) + + # 获取片段的开始和结束时间 + start_time, end_time = segment['new_timestamp'].split('-') + start_seconds = utils.time_to_seconds(start_time) + end_seconds = utils.time_to_seconds(end_time) + + # 根据OST设置处理音频 + if segment['OST'] == 0: + # 只使用TTS音频 + final_audio = final_audio.overlay(tts_audio, position=start_seconds * 1000) + elif segment['OST'] == 1: + # 只使用原声(假设原声已经在视频中) + continue + elif segment['OST'] == 2: + # 混合TTS音频和原声 + original_audio = AudioSegment.silent(duration=(end_seconds - start_seconds) * 1000) + mixed_audio = original_audio.overlay(tts_audio) + final_audio = final_audio.overlay(mixed_audio, position=start_seconds * 1000) + + except Exception as e: + logger.error(f"处理音频文件 {audio_file} 时出错: {str(e)}") continue - # 从文件名中提取时间戳 - filename = os.path.basename(audio_path) - start_time, end_time = extract_timestamp(filename) + # 保存合并后的音频文件 + output_audio_path = os.path.join(utils.task_dir(task_id), "final_audio.mp3") + final_audio.export(output_audio_path, format="mp3") + logger.info(f"合并后的音频文件已保存: {output_audio_path}") - # 读取音频文件 - try: - audio = AudioSegment.from_mp3(audio_path) - except Exception as e: - logger.error(f"错误:无法读取文件 {audio_path}。错误信息:{str(e)}") - continue - - # 将音频插入到空白音频的指定位置 - blank_audio = blank_audio.overlay(audio, position=start_time * 1000) - - # 尝试导出为WAV格式 - try: - output_file = os.path.join(output_dir, "audio.wav") - blank_audio.export(output_file, format="wav") - logger.info(f"音频合并完成,已保存为 {output_file}") - except Exception as e: - logger.info(f"导出为WAV格式失败,尝试使用MP3格式:{str(e)}") - try: - output_file = os.path.join(output_dir, "audio.mp3") - blank_audio.export(output_file, format="mp3", codec="libmp3lame") - logger.info(f"音频合并完成,已保存为 {output_file}") - except Exception as e: - logger.error(f"导出音频失败:{str(e)}") - return None, None - - return output_file - -def parse_timestamp(timestamp: str): - """解析时间戳字符串为秒数""" - # 确保使用冒号作为分隔符 - timestamp = timestamp.replace('_', ':') - return time_to_seconds(timestamp) - -def extract_timestamp(filename): - """从文件名中提取开始和结束时间戳""" - # 从 "audio_00_06-00_24.mp3" 这样的格式中提取时间 - time_part = filename.split('_', 1)[1].split('.')[0] # 获取 "00_06-00_24" 部分 - start_time, end_time = time_part.split('-') # 分割成 "00_06" 和 "00_24" - - # 将下划线格式转换回冒号格式 - start_time = start_time.replace('_', ':') - end_time = end_time.replace('_', ':') - - # 将时间戳转换为秒 - start_seconds = time_to_seconds(start_time) - end_seconds = time_to_seconds(end_time) - - return start_seconds, end_seconds + return output_audio_path def time_to_seconds(time_str): - """将 "00:06" 或 "00_06" 格式转换为总秒数""" - # 确保使用冒号作为分隔符 - time_str = time_str.replace('_', ':') + """ + 将时间字符串转换为秒数,支持多种格式: + 1. 'HH:MM:SS,mmm' (时:分:秒,毫秒) + 2. 'MM:SS,mmm' (分:秒,毫秒) + 3. 'SS,mmm' (秒,毫秒) + """ try: - parts = time_str.split(':') - if len(parts) != 2: - logger.error(f"Invalid time format: {time_str}") - return 0 - return int(parts[0]) * 60 + int(parts[1]) + # 处理毫秒部分 + if ',' in time_str: + time_part, ms_part = time_str.split(',') + ms = float(ms_part) / 1000 + else: + time_part = time_str + ms = 0 + + # 分割时间部分 + parts = time_part.split(':') + + if len(parts) == 3: # HH:MM:SS + h, m, s = map(int, parts) + seconds = h * 3600 + m * 60 + s + elif len(parts) == 2: # MM:SS + m, s = map(int, parts) + seconds = m * 60 + s + else: # SS + seconds = int(parts[0]) + + return seconds + ms except (ValueError, IndexError) as e: logger.error(f"Error parsing time {time_str}: {str(e)}") - return 0 + return 0.0 + + +def extract_timestamp(filename): + """ + 从文件名中提取开始和结束时间戳 + 例如: "audio_00_06,500-00_24,800.mp3" -> (6.5, 24.8) + """ + try: + # 从文件名中提取时间部分 + time_part = filename.split('_', 1)[1].split('.')[0] # 获取 "00_06,500-00_24,800" 部分 + start_time, end_time = time_part.split('-') # 分割成开始和结束时间 + + # 将下划线格式转换回冒号格式 + start_time = start_time.replace('_', ':') + end_time = end_time.replace('_', ':') + + # 将时间戳转换为秒 + start_seconds = time_to_seconds(start_time) + end_seconds = time_to_seconds(end_time) + + return start_seconds, end_seconds + except Exception as e: + logger.error(f"Error extracting timestamp from {filename}: {str(e)}") + return 0.0, 0.0 if __name__ == "__main__": diff --git a/app/services/material.py b/app/services/material.py index 696eda8..5ec6ee4 100644 --- a/app/services/material.py +++ b/app/services/material.py @@ -3,6 +3,7 @@ import subprocess import random import traceback from urllib.parse import urlencode +from datetime import datetime import requests from typing import List @@ -253,34 +254,58 @@ def download_videos( def time_to_seconds(time_str: str) -> float: """ - 将时间字符串转换为秒数 - 支持格式: - 1. "MM:SS" (分:秒) - 2. "SS" (纯秒数) + 将时间字符串转换为秒数,支持多种格式: + 1. 'HH:MM:SS,mmm' (时:分:秒,毫秒) + 2. 'MM:SS' (分:秒) + 3. 'SS' (秒) """ - parts = time_str.split(':') - if len(parts) == 2: - minutes, seconds = map(float, parts) - return minutes * 60 + seconds - return float(time_str) + try: + # 处理毫秒部分 + if ',' in time_str: + time_part, ms_part = time_str.split(',') + ms = int(ms_part) / 1000 + else: + time_part = time_str + ms = 0 + + # 根据格式分别处理 + parts = time_part.split(':') + if len(parts) == 3: # HH:MM:SS + time_obj = datetime.strptime(time_part, "%H:%M:%S") + seconds = time_obj.hour * 3600 + time_obj.minute * 60 + time_obj.second + elif len(parts) == 2: # MM:SS + time_obj = datetime.strptime(time_part, "%M:%S") + seconds = time_obj.minute * 60 + time_obj.second + else: # SS + seconds = float(time_part) + + return seconds + ms + except ValueError as e: + logger.error(f"时间格式错误: {time_str}") + raise ValueError(f"时间格式错误,支持的格式:HH:MM:SS,mmm 或 MM:SS 或 SS") from e def format_timestamp(seconds: float) -> str: """ - 将秒数转换为 "MM:SS" 格式的时间字符串 + 将秒数转换为可读的时间格式 (HH:MM:SS,mmm) """ - minutes = int(seconds) // 60 - secs = int(seconds) % 60 - return f"{minutes:02d}:{secs:02d}" + hours = int(seconds // 3600) + minutes = int((seconds % 3600) // 60) + seconds_remain = seconds % 60 + whole_seconds = int(seconds_remain) + milliseconds = int((seconds_remain - whole_seconds) * 1000) + + return f"{hours:02d}:{minutes:02d}:{whole_seconds:02d},{milliseconds:03d}" def save_clip_video(timestamp: str, origin_video: str, save_dir: str = "") -> dict: """ 保存剪辑后的视频 Args: - timestamp: 需要裁剪的单个时间戳,支持两种格式: - 1. '00:36-00:40' (分:秒-分:秒) - 2. 'SS-SS' (秒-秒) + timestamp: 需要裁剪的单个时间戳,支持格式: + 1. 'HH:MM:SS,mmm-HH:MM:SS,mmm' (时:分:秒,毫秒) + 2. 'MM:SS-MM:SS' (分:秒-分:秒) + 3. 'SS-SS' (秒-秒) origin_video: 原视频路径 save_dir: 存储目录 @@ -293,7 +318,7 @@ def save_clip_video(timestamp: str, origin_video: str, save_dir: str = "") -> di if not os.path.exists(save_dir): os.makedirs(save_dir) - video_id = f"vid-{timestamp.replace(':', '_')}" + video_id = f"vid-{timestamp.replace(':', '_').replace(',', '-')}" video_path = f"{save_dir}/{video_id}.mp4" if os.path.exists(video_path) and os.path.getsize(video_path) > 0: @@ -312,12 +337,12 @@ def save_clip_video(timestamp: str, origin_video: str, save_dir: str = "") -> di # 验证时间段是否有效 if start >= total_duration: - logger.warning(f"起始时间 {format_timestamp(start)} ({start:.2f}秒) 超出视频总时长 {format_timestamp(total_duration)} ({total_duration:.2f}秒)") + logger.warning(f"起始时间 {format_timestamp(start)} ({start:.3f}秒) 超出视频总时长 {format_timestamp(total_duration)} ({total_duration:.3f}秒)") video.close() return {} if end > total_duration: - logger.warning(f"结束时间 {format_timestamp(end)} ({end:.2f}秒) 超出视频总时长 {format_timestamp(total_duration)} ({total_duration:.2f}秒),将自动调整为视频结尾") + logger.warning(f"结束时间 {format_timestamp(end)} ({end:.3f}秒) 超出视频总时长 {format_timestamp(total_duration)} ({total_duration:.3f}秒),将自动调整为视频结尾") end = total_duration if end <= start: @@ -332,7 +357,15 @@ def save_clip_video(timestamp: str, origin_video: str, save_dir: str = "") -> di try: # 检查视频是否有音频轨道并写入文件 - subclip.write_videofile(video_path, audio=(subclip.audio is not None), logger=None) + subclip.write_videofile( + video_path, + codec='libx264', + audio_codec='aac', + temp_audiofile='temp-audio.m4a', + remove_temp=True, + audio=(subclip.audio is not None), + logger=None + ) # 验证生成的视频文件 if os.path.exists(video_path) and os.path.getsize(video_path) > 0: diff --git a/app/services/task.py b/app/services/task.py index c030574..5cd31ed 100644 --- a/app/services/task.py +++ b/app/services/task.py @@ -206,134 +206,14 @@ def generate_final_videos( return final_video_paths, combined_video_paths -def start(task_id, params: VideoParams, stop_at: str = "video"): - logger.info(f"start task: {task_id}, stop_at: {stop_at}") - sm.state.update_task(task_id, state=const.TASK_STATE_PROCESSING, progress=5) - - if type(params.video_concat_mode) is str: - params.video_concat_mode = VideoConcatMode(params.video_concat_mode) - - # 1. Generate script - video_script = generate_script(task_id, params) - if not video_script: - sm.state.update_task(task_id, state=const.TASK_STATE_FAILED) - return - - sm.state.update_task(task_id, state=const.TASK_STATE_PROCESSING, progress=10) - - if stop_at == "script": - sm.state.update_task( - task_id, state=const.TASK_STATE_COMPLETE, progress=100, script=video_script - ) - return {"script": video_script} - - # 2. Generate terms - video_terms = "" - if params.video_source != "local": - video_terms = generate_terms(task_id, params, video_script) - if not video_terms: - sm.state.update_task(task_id, state=const.TASK_STATE_FAILED) - return - - save_script_data(task_id, video_script, video_terms, params) - - if stop_at == "terms": - sm.state.update_task( - task_id, state=const.TASK_STATE_COMPLETE, progress=100, terms=video_terms - ) - return {"script": video_script, "terms": video_terms} - - sm.state.update_task(task_id, state=const.TASK_STATE_PROCESSING, progress=20) - - # 3. Generate audio - audio_file, audio_duration, sub_maker = generate_audio(task_id, params, video_script) - if not audio_file: - sm.state.update_task(task_id, state=const.TASK_STATE_FAILED) - return - - sm.state.update_task(task_id, state=const.TASK_STATE_PROCESSING, progress=30) - - if stop_at == "audio": - sm.state.update_task( - task_id, - state=const.TASK_STATE_COMPLETE, - progress=100, - audio_file=audio_file, - ) - return {"audio_file": audio_file, "audio_duration": audio_duration} - - # 4. Generate subtitle - subtitle_path = generate_subtitle(task_id, params, video_script, sub_maker, audio_file) - - if stop_at == "subtitle": - sm.state.update_task( - task_id, - state=const.TASK_STATE_COMPLETE, - progress=100, - subtitle_path=subtitle_path, - ) - return {"subtitle_path": subtitle_path} - - sm.state.update_task(task_id, state=const.TASK_STATE_PROCESSING, progress=40) - - # 5. Get video materials - downloaded_videos = get_video_materials( - task_id, params, video_terms, audio_duration - ) - if not downloaded_videos: - sm.state.update_task(task_id, state=const.TASK_STATE_FAILED) - return - - if stop_at == "materials": - sm.state.update_task( - task_id, - state=const.TASK_STATE_COMPLETE, - progress=100, - materials=downloaded_videos, - ) - return {"materials": downloaded_videos} - - sm.state.update_task(task_id, state=const.TASK_STATE_PROCESSING, progress=50) - - # 6. Generate final videos - final_video_paths, combined_video_paths = generate_final_videos( - task_id, params, downloaded_videos, audio_file, subtitle_path - ) - - if not final_video_paths: - sm.state.update_task(task_id, state=const.TASK_STATE_FAILED) - return - - logger.success( - f"task {task_id} finished, generated {len(final_video_paths)} videos." - ) - - kwargs = { - "videos": final_video_paths, - "combined_videos": combined_video_paths, - "script": video_script, - "terms": video_terms, - "audio_file": audio_file, - "audio_duration": audio_duration, - "subtitle_path": subtitle_path, - "materials": downloaded_videos, - } - sm.state.update_task( - task_id, state=const.TASK_STATE_COMPLETE, progress=100, **kwargs - ) - return kwargs - - def start_subclip(task_id: str, params: VideoClipParams, subclip_path_videos: dict): - """ - 后台任务(自动剪辑视频进行剪辑) - - task_id: 任务ID - params: 剪辑参数 - subclip_path_videos: 视频文件路径 - - """ + """后台任务(自动剪辑视频进行剪辑)""" logger.info(f"\n\n## 开始任务: {task_id}") + + # 初始化 ImageMagick + if not utils.init_imagemagick(): + logger.warning("ImageMagick 初始化失败,字幕可能无法正常显示") + sm.state.update_task(task_id, state=const.TASK_STATE_PROCESSING, progress=5) # tts 角色名称 @@ -341,8 +221,7 @@ def start_subclip(task_id: str, params: VideoClipParams, subclip_path_videos: di logger.info("\n\n## 1. 加载视频脚本") video_script_path = path.join(params.video_clip_json_path) - # video_script_path = video_clip_json_path - # 判断json文件是否存在 + if path.exists(video_script_path): try: with open(video_script_path, "r", encoding="utf-8") as f: @@ -355,10 +234,12 @@ def start_subclip(task_id: str, params: VideoClipParams, subclip_path_videos: di logger.debug(f"解说完整脚本: \n{video_script}") logger.debug(f"解说 OST 列表: \n{video_ost}") logger.debug(f"解说时间戳列表: \n{time_list}") + # 获取视频总时长(单位 s) - total_duration = list_script[-1]['new_timestamp'] - total_duration = int(total_duration.split("-")[1].split(":")[0]) * 60 + int( - total_duration.split("-")[1].split(":")[1]) + last_timestamp = list_script[-1]['new_timestamp'] + end_time = last_timestamp.split("-")[1] + total_duration = utils.time_to_seconds(end_time) + except Exception as e: logger.error(f"无法读取视频json脚本,请检查配置是否正确。{e}") raise ValueError("无法读取视频json脚本,请检查配置是否正确") @@ -366,32 +247,51 @@ def start_subclip(task_id: str, params: VideoClipParams, subclip_path_videos: di logger.error(f"video_script_path: {video_script_path} \n\n", traceback.format_exc()) raise ValueError("解说脚本不存在!请检查配置是否正确。") - logger.info("\n\n## 2. 生成音频列表") - audio_files, sub_maker_list = voice.tts_multiple( - task_id=task_id, - list_script=list_script, - voice_name=voice_name, - voice_rate=params.voice_rate, - voice_pitch=params.voice_pitch, - force_regenerate=True + logger.info("\n\n## 2. 根据OST设置生成音频列表") + # 只为OST=0或2的片段生成TTS音频 + tts_segments = [ + segment for segment in list_script + if segment['OST'] in [0, 2] + ] + logger.debug(f"tts_segments: {tts_segments}") + if tts_segments: + audio_files, sub_maker_list = voice.tts_multiple( + task_id=task_id, + list_script=tts_segments, # 只传入需要TTS的片段 + voice_name=voice_name, + voice_rate=params.voice_rate, + voice_pitch=params.voice_pitch, + force_regenerate=True + ) + if audio_files is None: + sm.state.update_task(task_id, state=const.TASK_STATE_FAILED) + logger.error("TTS转换音频失败, 可能是网络不可用! 如果您在中国, 请使用VPN.") + return + else: + audio_files = [] + + logger.info(f"合并音频文件:\n{audio_files}") + # 传入OST信息以便正确处理音频 + final_audio = audio_merger.merge_audio_files( + task_id=task_id, + audio_files=audio_files, + total_duration=total_duration, + list_script=list_script # 传入完整脚本以便处理OST ) - if audio_files is None: - sm.state.update_task(task_id, state=const.TASK_STATE_FAILED) - logger.error( - "TTS转换音频失败, 可能是网络不可用! 如果您在中国, 请使用VPN.") - return - logger.info(f"合并音频:\n\n {audio_files}") - audio_file = audio_merger.merge_audio_files(task_id, audio_files, total_duration, list_script) sm.state.update_task(task_id, state=const.TASK_STATE_PROCESSING, progress=30) + # 只为OST=0或2的片段生成字幕 subtitle_path = "" if params.subtitle_enabled: subtitle_path = path.join(utils.task_dir(task_id), f"subtitle.srt") subtitle_provider = config.app.get("subtitle_provider", "").strip().lower() logger.info(f"\n\n## 3. 生成字幕、提供程序是: {subtitle_provider}") - # 使用 faster-whisper-large-v2 模型生成字幕 - subtitle.create(audio_file=audio_file, subtitle_file=subtitle_path) + + subtitle.create( + audio_file=final_audio, + subtitle_file=subtitle_path, + ) subtitle_lines = subtitle.file_to_subtitles(subtitle_path) if not subtitle_lines: @@ -434,14 +334,15 @@ def start_subclip(task_id: str, params: VideoClipParams, subclip_path_videos: di final_video_path = path.join(utils.task_dir(task_id), f"final-{index}.mp4") - logger.info(f"\n\n## 6. 最后一步: {index} => {final_video_path}") - # 把所有东西合到在一起 + logger.info(f"\n\n## 6. 最后合成: {index} => {final_video_path}") + # 传入OST信息以便正确处理音频和视频 video.generate_video_v2( video_path=combined_video_path, - audio_path=audio_file, + audio_path=final_audio, subtitle_path=subtitle_path, output_file=final_video_path, params=params, + list_script=list_script # 传入完整脚本以便处理OST ) _progress += 50 / 2 diff --git a/app/services/video.py b/app/services/video.py index 1d270fa..8e6e32d 100644 --- a/app/services/video.py +++ b/app/services/video.py @@ -173,7 +173,7 @@ def wrap_text(text, max_width, font, fontsize=60): if width <= max_width: return text, height - logger.debug(f"换行文本, 最大宽度: {max_width}, 文本宽度: {width}, 文本: {text}") + logger.debug(f"换行文本, 最大宽度: {max_width}, 文本宽度: {width}, 本: {text}") processed = True @@ -228,131 +228,93 @@ def manage_clip(clip): def generate_video_v2( - video_path: str, - audio_path: str, - subtitle_path: str, - output_file: str, - params: Union[VideoParams, VideoClipParams], - progress_callback=None, + video_path: str, + audio_path: str, + subtitle_path: str, + output_file: str, + params: VideoClipParams, + list_script: list = None ): """ - 合并所有素材 + 生成最终视频,处理音频和字幕 + Args: - video_path: 视频路径 - audio_path: 单个音频文件路径 + video_path: 视频文件路径 + audio_path: 音频文件路径 subtitle_path: 字幕文件路径 output_file: 输出文件路径 params: 视频参数 - progress_callback: 进度回调函数,接收 0-100 的进度值 - - Returns: - + list_script: 视频脚本列表,包含OST设置 """ - total_steps = 4 - current_step = 0 - - def update_progress(step_name): - nonlocal current_step - current_step += 1 - if progress_callback: - progress_callback(int(current_step * 100 / total_steps)) - logger.info(f"完成步骤: {step_name}") - try: - validate_params(video_path, audio_path, output_file, params) + video_clip = VideoFileClip(video_path) - with manage_clip(VideoFileClip(video_path)) as video_clip: - aspect = VideoAspect(params.video_aspect) - video_width, video_height = aspect.to_resolution() - - logger.info(f"开始,视频尺寸: {video_width} x {video_height}") - logger.info(f" ① 视频: {video_path}") - logger.info(f" ② 音频: {audio_path}") - logger.info(f" ③ 字幕: {subtitle_path}") - logger.info(f" ④ 输出: {output_file}") - - output_dir = os.path.dirname(output_file) - update_progress("初始化完成") - - # 字体设置 - font_path = "" - if params.subtitle_enabled: - if not params.font_name: - params.font_name = "STHeitiMedium.ttc" - font_path = os.path.join(utils.font_dir(), params.font_name) - if os.name == "nt": - font_path = font_path.replace("\\", "/") - logger.info(f"使用字体: {font_path}") - - def create_text_clip(subtitle_item): - phrase = subtitle_item[1] - max_width = video_width * 0.9 - wrapped_txt, txt_height = wrap_text( - phrase, max_width=max_width, font=font_path, fontsize=params.font_size - ) - _clip = TextClip( - wrapped_txt, - font=font_path, - fontsize=params.font_size, - color=params.text_fore_color, - bg_color=params.text_background_color, - stroke_color=params.stroke_color, - stroke_width=params.stroke_width, - print_cmd=False, - ) - duration = subtitle_item[0][1] - subtitle_item[0][0] - _clip = _clip.set_start(subtitle_item[0][0]) - _clip = _clip.set_end(subtitle_item[0][1]) - _clip = _clip.set_duration(duration) - - if params.subtitle_position == "bottom": - _clip = _clip.set_position(("center", video_height * 0.95 - _clip.h)) - elif params.subtitle_position == "top": - _clip = _clip.set_position(("center", video_height * 0.05)) - elif params.subtitle_position == "custom": - margin = 10 - max_y = video_height - _clip.h - margin - min_y = margin - custom_y = (video_height - _clip.h) * (params.custom_position / 100) - custom_y = max(min_y, min(custom_y, max_y)) - _clip = _clip.set_position(("center", custom_y)) - else: # center - _clip = _clip.set_position(("center", "center")) - return _clip - - update_progress("字体设置完成") - - # 处理音频 - original_audio = video_clip.audio - video_duration = video_clip.duration - new_audio = AudioFileClip(audio_path) - final_audio = process_audio_tracks(original_audio, new_audio, params, video_duration) - update_progress("音频处理完成") - - # 处理字幕 - if subtitle_path and os.path.exists(subtitle_path): - video_clip = process_subtitles(subtitle_path, video_clip, video_duration, create_text_clip) - update_progress("字幕处理完成") - - # 合并音频和导出 - video_clip = video_clip.set_audio(final_audio) - video_clip.write_videofile( - output_file, - audio_codec="aac", - temp_audiofile=os.path.join(output_dir, "temp-audio.m4a"), - threads=params.n_threads, - logger=None, - fps=30, - ) + # 处理音频 + if audio_path and os.path.exists(audio_path): + audio_clip = AudioFileClip(audio_path) - except FileNotFoundError as e: - logger.error(f"文件不存在: {str(e)}") - raise + if list_script: + # 根据OST设置处理音频 + # OST=0: 只使用TTS音频 + # OST=1: 只使用视频原声 + # OST=2: 混合TTS音频和视频原声 + original_audio = video_clip.audio + + # 设置音频音量 + tts_volume = params.tts_volume if hasattr(params, 'tts_volume') else 1.0 + video_volume = params.video_volume if hasattr(params, 'video_volume') else 0.1 + + # 创建最终音频 + if original_audio: + # 有些片段需要原声,有些需要TTS + final_audio = CompositeAudioClip([ + audio_clip.volumex(tts_volume), # TTS音频 + original_audio.volumex(video_volume) # 原声音频 + ]) + else: + final_audio = audio_clip.volumex(tts_volume) + else: + # 如果没有OST设置,使用默认行为 + final_audio = audio_clip + + video_clip = video_clip.set_audio(final_audio) + + # 处理字幕 + if subtitle_path and os.path.exists(subtitle_path): + # 添加字幕 + video_clip = add_subtitles( + video_clip, + subtitle_path, + params.font_size, + params.font_name, + params.text_fore_color, + params.subtitle_position, + params.stroke_color, + params.stroke_width + ) + + # 写入最终视频文件 + video_clip.write_videofile( + output_file, + codec="libx264", + audio_codec="aac", + temp_audiofile="temp-audio.m4a", + remove_temp=True, + threads=params.n_threads + ) + except Exception as e: - logger.error(f"视频生成失败: {str(e)}") - raise + logger.error(f"生成视频时发生错误: {str(e)}") + raise e + finally: - logger.success("完成") + # 清理资源 + if 'video_clip' in locals(): + video_clip.close() + if 'audio_clip' in locals(): + audio_clip.close() + if 'final_audio' in locals(): + final_audio.close() def process_audio_tracks(original_audio, new_audio, params, video_duration): @@ -389,7 +351,7 @@ def process_subtitles(subtitle_path, video_clip, video_duration, create_text_cli for item in sub.subtitles: clip = create_text_clip(subtitle_item=item) - # 时间范围调整 + # 时间范围��整 start_time = max(clip.start, 0) if start_time >= video_duration: continue @@ -450,12 +412,12 @@ def preprocess_video(materials: List[MaterialInfo], clip_duration=4): def combine_clip_videos(combined_video_path: str, - video_paths: List[str], - video_ost_list: List[int], - list_script: list, - video_aspect: VideoAspect = VideoAspect.portrait, - threads: int = 2, - ) -> str: + video_paths: List[str], + video_ost_list: List[int], + list_script: list, + video_aspect: VideoAspect = VideoAspect.portrait, + threads: int = 2, + ) -> str: """ 合并子视频 Args: @@ -469,9 +431,18 @@ def combine_clip_videos(combined_video_path: str, Returns: str: 合并后的视频路径 """ - from app.utils.utils import calculate_total_duration - audio_duration = calculate_total_duration(list_script) - logger.info(f"音频的最大持续时间: {audio_duration} s") + # 计算总时长时需要考虑毫秒精度 + total_duration = 0.0 + for item in list_script: + timestamp = item.get('new_timestamp', '') + if timestamp: + start_str, end_str = timestamp.split('-') + start_time = utils.time_to_seconds(start_str) + end_time = utils.time_to_seconds(end_str) + duration = end_time - start_time + total_duration += duration + + logger.info(f"音频的最大持续时间: {total_duration:.3f} s") output_dir = os.path.dirname(combined_video_path) aspect = VideoAspect(video_aspect) @@ -480,11 +451,17 @@ def combine_clip_videos(combined_video_path: str, clips = [] for video_path, video_ost in zip(video_paths, video_ost_list): try: + # 加载视频片段 clip = VideoFileClip(video_path) + # 根据OST设置处理音频 if video_ost == 0: # 不保留原声 clip = clip.without_audio() - # video_ost 为 1 或 2 时都保留原声,不需要特殊处理 + elif video_ost == 1: # 只保留原声 + # 保持原声,但可能需要调整音量 + if clip.audio: + clip = clip.set_audio(clip.audio.volumex(1.0)) # 可以调整音量系数 + # OST == 2 的情况会在后续处理中混合音频 clip = clip.set_fps(30) @@ -498,6 +475,16 @@ def combine_clip_videos(combined_video_path: str, ) logger.info(f"视频 {video_path} 已调整尺寸为 {video_width} x {video_height}") + # 精确控制视频时长 + filename = os.path.basename(video_path) + timestamp = extract_timestamp_from_filename(filename) + if timestamp: + start_time, end_time = timestamp + clip_duration = end_time - start_time + if abs(clip.duration - clip_duration) > 0.1: # 允许0.1秒的误差 + logger.warning(f"视频 {video_path} 时长与时间戳不匹配,进行调整") + clip = clip.set_duration(clip_duration) + clips.append(clip) except Exception as e: @@ -508,6 +495,7 @@ def combine_clip_videos(combined_video_path: str, raise ValueError("没有有效的视频片段可以合并") try: + # 合并所有视频片段 video_clip = concatenate_videoclips(clips) video_clip = video_clip.set_fps(30) @@ -521,7 +509,7 @@ def combine_clip_videos(combined_video_path: str, temp_audiofile=os.path.join(output_dir, "temp-audio.m4a") ) finally: - # 确保资源被正确���放 + # 确保资源被正确释放 video_clip.close() for clip in clips: clip.close() @@ -530,6 +518,59 @@ def combine_clip_videos(combined_video_path: str, return combined_video_path +def extract_timestamp_from_filename(filename: str) -> tuple: + """ + 从文件名中提取时间戳,支持多种格式: + - "vid-00_06,500-00_24,800.mp4" -> (6.5, 24.8) + - "vid-00_00_00-020-00_00_10-400.mp4" -> (0.02, 10.4) + """ + try: + # 提取时间戳部分 + match = re.search(r'vid-(.+?)\.mp4$', filename) + if not match: + logger.warning(f"文件名格式不正确: {filename}") + return None + + timestamp = match.group(1) + + # 处理包含毫秒的格式 (00_00_00-020-00_00_10-400) + if timestamp.count('-') == 3: + parts = timestamp.split('-') + start_time = f"{parts[0]}-{parts[1]}" # 组合开始时间和毫秒 + end_time = f"{parts[2]}-{parts[3]}" # 组合结束时间和毫秒 + + # 转换开始时间 + start_time_str = start_time.replace('_', ':') + if start_time_str.count(':') == 2: # 如果是 00:00:00-020 格式 + start_base = utils.time_to_seconds(start_time_str.split('-')[0]) + start_ms = int(start_time_str.split('-')[1]) / 1000 + start_seconds = start_base + start_ms + else: + start_seconds = utils.time_to_seconds(start_time_str) + + # 转换结束时间 + end_time_str = end_time.replace('_', ':') + if end_time_str.count(':') == 2: # 如果是 00:00:10-400 格式 + end_base = utils.time_to_seconds(end_time_str.split('-')[0]) + end_ms = int(end_time_str.split('-')[1]) / 1000 + end_seconds = end_base + end_ms + else: + end_seconds = utils.time_to_seconds(end_time_str) + + # 处理简单格式 (00_06-00_24) + else: + start_str, end_str = timestamp.split('-') + start_seconds = utils.time_to_seconds(start_str.replace('_', ':')) + end_seconds = utils.time_to_seconds(end_str.replace('_', ':')) + + logger.debug(f"从文件名 {filename} 提取时间戳: {start_seconds:.3f} - {end_seconds:.3f}") + return start_seconds, end_seconds + + except Exception as e: + logger.error(f"从文件名提取时间戳失败 {filename}: {str(e)}\n{traceback.format_exc()}") + return None + + def resize_video_with_padding(clip, target_width: int, target_height: int): """辅助函数:调整视频尺寸并添加黑边""" clip_ratio = clip.w / clip.h @@ -574,6 +615,71 @@ def validate_params(video_path, audio_path, output_file, params): raise ValueError("params 缺少必要参数 video_aspect") +def add_subtitles(video_clip, subtitle_path, font_size, font_name, font_color, position, shadow_color, shadow_offset): + """ + 为视频添加字幕 + + Args: + video_clip: 视频剪辑对象 + subtitle_path: 字幕文件路径 + font_size: 字体大小 + font_name: 字体名称 + font_color: 字体颜色 + position: 字幕位置 ('top', 'center', 'bottom') + shadow_color: 阴影颜色 + shadow_offset: 阴影偏移 + + Returns: + 带有字幕的视频剪辑对象 + """ + try: + # 确保字体文件存在 + font_path = os.path.join(utils.font_dir(), font_name) + if not os.path.exists(font_path): + logger.error(f"字体文件不存在: {font_path}") + # 尝试使用系统默认字体 + font_path = "Arial" if os.name == 'nt' else "/System/Library/Fonts/STHeiti Light.ttc" + logger.info(f"使用默认字体: {font_path}") + + # 设置字幕位置 + if position == "top": + pos = ("center", 50) + elif position == "center": + pos = "center" + else: # bottom + pos = ("center", -50) + + def subtitle_generator(txt): + return TextClip( + txt, + fontsize=font_size, + font=font_path, + color=font_color, + stroke_color=shadow_color, + stroke_width=shadow_offset, + method='caption', # 使用 caption 方法可能更稳定 + size=(video_clip.w * 0.9, None) # 限制字幕宽度 + ) + + subtitles = SubtitlesClip( + subtitle_path, + subtitle_generator + ) + + # 添加字幕到视频 + video_with_subtitles = CompositeVideoClip([ + video_clip, + subtitles.set_position(pos) + ]) + + return video_with_subtitles + + except Exception as e: + logger.error(f"添加字幕时出错: {str(e)}\n{traceback.format_exc()}") + # 如果添加字幕失败,返回原始视频 + return video_clip + + if __name__ == "__main__": # combined_video_path = "../../storage/tasks/12312312/com123.mp4" # @@ -586,7 +692,7 @@ if __name__ == "__main__": # { # "picture": "夜晚,一个小孩在树林里奔跑,后面有人拿着火把在追赶", # "timestamp": "00:00-00:03", - # "narration": "夜黑风高的树林,一个小孩在拼命奔跑,后面的人穷追不舍!", + # "narration": "夜���风高的树林,一个小孩在拼命奔跑,后面的人穷追不舍!", # "OST": False, # "new_timestamp": "00:00-00:03" # }, diff --git a/app/test/test_moviepy.py b/app/test/test_moviepy.py index 5b24ebf..79d93c2 100644 --- a/app/test/test_moviepy.py +++ b/app/test/test_moviepy.py @@ -1,5 +1,5 @@ """ -使用 moviepy 库剪辑指定时间戳视频 +使用 moviepy 库剪辑指定时间戳视频,支持时分秒毫秒精度 """ from moviepy.editor import VideoFileClip @@ -11,12 +11,22 @@ def time_str_to_seconds(time_str: str) -> float: """ 将时间字符串转换为秒数 参数: - time_str: 格式为"MM:SS"的时间字符串 + time_str: 格式为"HH:MM:SS,mmm"的时间字符串,例如"00:01:23,456" 返回: - 转换后的秒数 + 转换后的秒数(float) """ - time_obj = datetime.strptime(time_str, "%M:%S") - return time_obj.minute * 60 + time_obj.second + try: + # 分离时间和毫秒 + time_part, ms_part = time_str.split(',') + # 转换时分秒 + time_obj = datetime.strptime(time_part, "%H:%M:%S") + # 计算总秒数 + total_seconds = time_obj.hour * 3600 + time_obj.minute * 60 + time_obj.second + # 添加毫秒部分 + total_seconds += int(ms_part) / 1000 + return total_seconds + except ValueError as e: + raise ValueError("时间格式错误,请使用 HH:MM:SS,mmm 格式,例如 00:01:23,456") from e def format_duration(seconds: float) -> str: @@ -25,11 +35,15 @@ def format_duration(seconds: float) -> str: 参数: seconds: 秒数 返回: - 格式化的时间字符串 (MM:SS) + 格式化的时间字符串 (HH:MM:SS,mmm) """ - minutes = int(seconds // 60) - remaining_seconds = int(seconds % 60) - return f"{minutes:02d}:{remaining_seconds:02d}" + hours = int(seconds // 3600) + minutes = int((seconds % 3600) // 60) + seconds_remain = seconds % 60 + whole_seconds = int(seconds_remain) + milliseconds = int((seconds_remain - whole_seconds) * 1000) + + return f"{hours:02d}:{minutes:02d}:{whole_seconds:02d},{milliseconds:03d}" def cut_video(video_path: str, start_time: str, end_time: str, output_path: str) -> None: @@ -37,8 +51,8 @@ def cut_video(video_path: str, start_time: str, end_time: str, output_path: str) 剪辑视频 参数: video_path: 视频文件路径 - start_time: 开始时间 (格式: "MM:SS") - end_time: 结束时间 (格式: "MM:SS") + start_time: 开始时间 (格式: "HH:MM:SS,mmm") + end_time: 结束时间 (格式: "HH:MM:SS,mmm") output_path: 输出文件路径 """ try: @@ -62,10 +76,18 @@ def cut_video(video_path: str, start_time: str, end_time: str, output_path: str) # 加载视频文件 video = VideoFileClip(video_path) + # 验证时间范围 + if start_seconds >= video.duration or end_seconds > video.duration: + raise ValueError(f"剪辑时间超出视频长度!视频总长度为: {format_duration(video.duration)}") + + if start_seconds >= end_seconds: + raise ValueError("结束时间必须大于开始时间!") + # 计算剪辑时长 clip_duration = end_seconds - start_seconds print(f"原视频总长度: {format_duration(video.duration)}") print(f"剪辑时长: {format_duration(clip_duration)}") + print(f"剪辑区间: {start_time} -> {end_time}") # 剪辑视频 video = video.subclip(start_seconds, end_seconds) @@ -92,6 +114,9 @@ def cut_video(video_path: str, start_time: str, end_time: str, output_path: str) if __name__ == "__main__": - # cut_video("E:\\NarratoAI_v0.3.5_cuda\\NarratoAI\storage\\tasks\ca4fee22-350b-47f9-bb2f-802ad96774f7\\final-2.mp4", "00:00", "07:00", "E:\\NarratoAI_v0.3.5_cuda\\NarratoAI\storage\\tasks\\yyjx2-1") - # cut_video("E:\\NarratoAI_v0.3.5_cuda\\NarratoAI\storage\\tasks\ca4fee22-350b-47f9-bb2f-802ad96774f7\\final-2.mp4", "07:00", "14:00", "E:\\NarratoAI_v0.3.5_cuda\\NarratoAI\storage\\tasks\\yyjx2-2") - cut_video("E:\\NarratoAI_v0.3.5_cuda\\NarratoAI\storage\\tasks\ca4fee22-350b-47f9-bb2f-802ad96774f7\\final-2.mp4", "14:00", "22:00", "E:\\NarratoAI_v0.3.5_cuda\\NarratoAI\storage\\tasks\\yyjx2-3") + cut_video( + video_path="/Users/apple/Desktop/NarratoAI/resource/videos/duanju_yuansp.mp4", + start_time="00:00:00,789", + end_time="00:02:00,123", + output_path="/Users/apple/Desktop/NarratoAI/resource/videos/duanju_yuansp_cut3.mp4" + ) diff --git a/app/test/test_qwen.py b/app/test/test_qwen.py index 77bca56..2a69225 100644 --- a/app/test/test_qwen.py +++ b/app/test/test_qwen.py @@ -2,11 +2,23 @@ import os import traceback import json from openai import OpenAI -from test_moviepy import cut_video +from pydantic import BaseModel +from typing import List from app.utils import utils from app.services.subtitle import extract_audio_and_create_subtitle +class Step(BaseModel): + timestamp: str + picture: str + narration: str + OST: int + new_timestamp: str + +class MathReasoning(BaseModel): + result: List[Step] + + def chat_with_qwen(prompt: str, system_message: str, subtitle_path: str) -> str: """ 与通义千问AI模型进行对话 @@ -23,7 +35,7 @@ def chat_with_qwen(prompt: str, system_message: str, subtitle_path: str) -> str: """ try: client = OpenAI( - api_key="sk-", + api_key="sk-a1acd853d88d41d3ae92777d7bfa2612", base_url="https://dashscope.aliyuncs.com/compatible-mode/v1", ) @@ -50,25 +62,25 @@ def chat_with_qwen(prompt: str, system_message: str, subtitle_path: str) -> str: # 使用示例 if __name__ == "__main__": try: - # video_path = utils.video_dir("duanju_yuansp.mp4") + video_path = utils.video_dir("duanju_yuansp.mp4") # # 判断视频是否存在 # if not os.path.exists(video_path): # print(f"视频文件不存在:{video_path}") # exit(1) # 提取字幕 subtitle_path = os.path.join(utils.video_dir(""), f"duanju_yuan.srt") - # extract_audio_and_create_subtitle(video_file=video_path, subtitle_file=subtitle_path) + extract_audio_and_create_subtitle(video_file=video_path, subtitle_file=subtitle_path) # 分析字幕 system_message = """ 你是一个视频srt字幕分析剪辑器, 输入视频的srt字幕, 分析其中的精彩且尽可能连续的片段并裁剪出来, 注意确保文字与时间戳的正确匹配。 - 输出需严格按照如下 json 格式: + 输出需严格按照如下 json 格式: [ { - "timestamp": "00:50-01:44", + "timestamp": "00:00:50,020-00,01:44,000", "picture": "画面1", "narration": "播放原声", "OST": 0, - "new_timestamp": "00:00-00:54" + "new_timestamp": "00:00:00,000-00:00:54,020" }, { "timestamp": "01:49-02:30", diff --git a/app/utils/utils.py b/app/utils/utils.py index 307823c..e864341 100644 --- a/app/utils/utils.py +++ b/app/utils/utils.py @@ -40,7 +40,7 @@ def to_json(obj): # 如果对象是二进制数据,转换为base64编码的字符串 elif isinstance(o, bytes): return "*** binary data ***" - # 如果对象是字典,递归处理每个键值对 + # 如果���象是字典,递归处理每个键值对 elif isinstance(o, dict): return {k: serialize(v) for k, v in o.items()} # 如果对象是列表或元组,递归处理每个元素 @@ -302,15 +302,49 @@ def get_current_country(): def time_to_seconds(time_str: str) -> float: - parts = time_str.split(':') - if len(parts) == 2: - m, s = map(float, parts) - return m * 60 + s - elif len(parts) == 3: - h, m, s = map(float, parts) - return h * 3600 + m * 60 + s - else: - raise ValueError(f"Invalid time format: {time_str}") + """ + 将时间字符串转换为秒数,支持多种格式: + - "HH:MM:SS,mmm" -> 小时:分钟:秒,毫秒 + - "MM:SS,mmm" -> 分钟:秒,毫秒 + - "SS,mmm" -> 秒,毫秒 + - "SS-mmm" -> 秒-毫秒 + + Args: + time_str: 时间字符串 + + Returns: + float: 转换后的秒数(包含毫秒) + """ + try: + # 处理带有'-'的毫秒格式 + if '-' in time_str: + time_part, ms_part = time_str.split('-') + ms = float(ms_part) / 1000 + # 处理带有','的毫秒格式 + elif ',' in time_str: + time_part, ms_part = time_str.split(',') + ms = float(ms_part) / 1000 + else: + time_part = time_str + ms = 0 + + # 分割时间部分 + parts = time_part.split(':') + + if len(parts) == 3: # HH:MM:SS + h, m, s = map(float, parts) + seconds = h * 3600 + m * 60 + s + elif len(parts) == 2: # MM:SS + m, s = map(float, parts) + seconds = m * 60 + s + else: # SS + seconds = float(parts[0]) + + return seconds + ms + + except (ValueError, IndexError) as e: + logger.error(f"时间格式转换错误 {time_str}: {str(e)}") + return 0.0 def seconds_to_time(seconds: float) -> str: @@ -520,3 +554,21 @@ def download_font(url: str, font_path: str): except Exception as e: logger.error(f"下载字体文件失败: {e}") raise + +def init_imagemagick(): + """初始化 ImageMagick 配置""" + try: + # 检查 ImageMagick 是否已安装 + import subprocess + result = subprocess.run(['magick', '-version'], capture_output=True, text=True) + if result.returncode != 0: + logger.error("ImageMagick 未安装或配置不正确") + return False + + # 设置 IMAGEMAGICK_BINARY 环境变量 + os.environ['IMAGEMAGICK_BINARY'] = 'magick' + + return True + except Exception as e: + logger.error(f"初始化 ImageMagick 失败: {str(e)}") + return False diff --git a/video_pipeline.py b/video_pipeline.py index 5dca576..dc7fa26 100644 --- a/video_pipeline.py +++ b/video_pipeline.py @@ -93,10 +93,8 @@ class VideoPipeline: response.raise_for_status() return response.json() - def save_script_to_json(self, script: list, script_name: str) -> str: - """保存脚本到json文件""" - script_path = f"E:\\projects\\NarratoAI\\resource\\scripts\\{script_name}.json" - + def save_script_to_json(self, script: list, script_path: str) -> str: + """保存脚本到json文件""" try: with open(script_path, 'w', encoding='utf-8') as f: json.dump(script, f, ensure_ascii=False, indent=2) @@ -133,8 +131,7 @@ class VideoPipeline: # 2.2 保存脚本到json文件 print("保存脚本到json文件...") - script_path = self.save_script_to_json(script, script_name) - script_result["script_path"] = script_path + self.save_script_to_json(script=script, script_path=script_path) # 3. 剪辑视频 print("开始剪辑视频...") @@ -143,7 +140,7 @@ class VideoPipeline: # 4. 生成最终视频 print("开始生成最终视频...") - final_result = self.generate_final_video( + self.generate_final_video( task_id=task_id, video_path=video_path, script_path=script_path, diff --git a/webui.txt b/webui.txt index b64b320..c8d66c9 100644 --- a/webui.txt +++ b/webui.txt @@ -369,4 +369,6 @@ output_path和script参数需要传递给请求3 } } subclip_videos和 output_path和script参数需要传递给请求4 -最后完成工作流 \ No newline at end of file +最后完成工作流 + +0代表只播放文案音频,禁用视频原声;1代表只播放视频原声,不需要播放文案音频和字幕;2代表即播放文案音频也要播放视频原声; \ No newline at end of file From 73729dcb7b7116ae858ee023f6d86d786a269c38 Mon Sep 17 00:00:00 2001 From: linyq Date: Wed, 20 Nov 2024 18:32:34 +0800 Subject: [PATCH 9/9] =?UTF-8?q?feat(utils):=20=E4=BC=98=E5=8C=96=E6=97=B6?= =?UTF-8?q?=E9=97=B4=E6=88=B3=E5=A4=84=E7=90=86=E5=B9=B6=E6=94=AF=E6=8C=81?= =?UTF-8?q?=E6=AF=AB=E7=A7=92=E7=BA=A7=E7=B2=BE=E5=BA=A6?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - 重构了时间戳转换函数,支持 HH:MM:SS,mmm 格式 - 优化了时间戳计算逻辑,提高了精度- 更新了相关服务和工具类中的时间戳处理方法 --- app/services/script_service.py | 59 +++++++++++++++++++++++++--------- app/utils/script_generator.py | 49 +++++++++++++++++++++------- app/utils/vision_analyzer.py | 29 ++++++++++++++--- 3 files changed, 105 insertions(+), 32 deletions(-) diff --git a/app/services/script_service.py b/app/services/script_service.py index 1693cbc..d42a232 100644 --- a/app/services/script_service.py +++ b/app/services/script_service.py @@ -35,7 +35,7 @@ class ScriptGenerator: video_theme: 视频主题 custom_prompt: 自定义提示词 skip_seconds: 跳过开始的秒数 - threshold: 差异阈值 + threshold: 差异���值 vision_batch_size: 视觉处理批次大小 vision_llm_provider: 视觉模型提供商 progress_callback: 进度回调函数 @@ -177,7 +177,7 @@ class ScriptGenerator: batch_files = self._get_batch_files(keyframe_files, result, vision_batch_size) first_timestamp, last_timestamp, _ = self._get_batch_timestamps(batch_files, prev_batch_files) - # 添加带时间戳的分析结果 + # 添加带时间戳的分��结果 frame_analysis += f"\n=== {first_timestamp}-{last_timestamp} ===\n" frame_analysis += result['response'] frame_analysis += "\n" @@ -214,7 +214,7 @@ class ScriptGenerator: progress_callback(90, "正在生成文案...") - # 获取文本生成配置 + # 获取文本生��配置 text_provider = config.app.get('text_llm_provider', 'gemini').lower() text_api_key = config.app.get(f'text_{text_provider}_api_key') text_model = config.app.get(f'text_{text_provider}_model_name') @@ -286,7 +286,7 @@ class ScriptGenerator: task_data = response.json() task_id = task_data["data"].get('task_id') if not task_id: - raise Exception(f"无效的API响应: {response.text}") + raise Exception(f"无效的API��应: {response.text}") progress_callback(50, "正在等待分析结果...") retry_count = 0 @@ -342,10 +342,10 @@ class ScriptGenerator: batch_files: List[str], prev_batch_files: List[str] = None ) -> tuple[str, str, str]: - """获取一批文件的时间戳范围""" + """获取一批文件的时间戳范围,支持毫秒级精度""" if not batch_files: logger.warning("Empty batch files") - return "00:00", "00:00", "00:00-00:00" + return "00:00:00,000", "00:00:00,000", "00:00:00,000-00:00:00,000" if len(batch_files) == 1 and prev_batch_files and len(prev_batch_files) > 0: first_frame = os.path.basename(prev_batch_files[-1]) @@ -358,18 +358,45 @@ class ScriptGenerator: last_time = last_frame.split('_')[2].replace('.jpg', '') def format_timestamp(time_str: str) -> str: - if len(time_str) < 4: - logger.warning(f"Invalid timestamp format: {time_str}") - return "00:00" + """将时间字符串转换为 HH:MM:SS,mmm 格式""" + try: + if len(time_str) < 4: + logger.warning(f"Invalid timestamp format: {time_str}") + return "00:00:00,000" - minutes = int(time_str[-4:-2]) - seconds = int(time_str[-2:]) - - if seconds >= 60: - minutes += seconds // 60 - seconds = seconds % 60 + # 处理毫秒部分 + if ',' in time_str: + time_part, ms_part = time_str.split(',') + ms = int(ms_part) + else: + time_part = time_str + ms = 0 - return f"{minutes:02d}:{seconds:02d}" + # 处理时分秒 + parts = time_part.split(':') + if len(parts) == 3: # HH:MM:SS + h, m, s = map(int, parts) + elif len(parts) == 2: # MM:SS + h = 0 + m, s = map(int, parts) + else: # SS + h = 0 + m = 0 + s = int(parts[0]) + + # 处理进位 + if s >= 60: + m += s // 60 + s = s % 60 + if m >= 60: + h += m // 60 + m = m % 60 + + return f"{h:02d}:{m:02d}:{s:02d},{ms:03d}" + + except Exception as e: + logger.error(f"时间戳格式转换错误 {time_str}: {str(e)}") + return "00:00:00,000" first_timestamp = format_timestamp(first_time) last_timestamp = format_timestamp(last_time) diff --git a/app/utils/script_generator.py b/app/utils/script_generator.py index e36064a..9005e32 100644 --- a/app/utils/script_generator.py +++ b/app/utils/script_generator.py @@ -406,22 +406,47 @@ class ScriptProcessor: def _save_results(self, frame_content_list: List[Dict]): """保存处理结果,并添加新的时间戳""" try: - # 转换秒数为 MM:SS 格式 - def seconds_to_time(seconds): - minutes = seconds // 60 - remaining_seconds = seconds % 60 - return f"{minutes:02d}:{remaining_seconds:02d}" + def format_timestamp(seconds: float) -> str: + """将秒数转换为 HH:MM:SS,mmm 格式""" + hours = int(seconds // 3600) + minutes = int((seconds % 3600) // 60) + seconds_remainder = seconds % 60 + whole_seconds = int(seconds_remainder) + milliseconds = int((seconds_remainder - whole_seconds) * 1000) + + return f"{hours:02d}:{minutes:02d}:{whole_seconds:02d},{milliseconds:03d}" # 计算新的时间戳 - current_time = 0 # 当前时间点(秒) + current_time = 0.0 # 当前时间点(秒,包含毫秒) for frame in frame_content_list: # 获取原始时间戳的持续时间 start_str, end_str = frame['timestamp'].split('-') - def time_to_seconds(time_str): - minutes, seconds = map(int, time_str.split(':')) - return minutes * 60 + seconds + def time_to_seconds(time_str: str) -> float: + """将时间字符串转换为秒数(包含毫秒)""" + try: + if ',' in time_str: + time_part, ms_part = time_str.split(',') + ms = float(ms_part) / 1000 + else: + time_part = time_str + ms = 0 + + parts = time_part.split(':') + if len(parts) == 3: # HH:MM:SS + h, m, s = map(float, parts) + seconds = h * 3600 + m * 60 + s + elif len(parts) == 2: # MM:SS + m, s = map(float, parts) + seconds = m * 60 + s + else: # SS + seconds = float(parts[0]) + + return seconds + ms + except Exception as e: + logger.error(f"时间格式转换错误 {time_str}: {str(e)}") + return 0.0 # 计算当前片段的持续时间 start_seconds = time_to_seconds(start_str) @@ -429,8 +454,8 @@ class ScriptProcessor: duration = end_seconds - start_seconds # 设置新的时间戳 - new_start = seconds_to_time(current_time) - new_end = seconds_to_time(current_time + duration) + new_start = format_timestamp(current_time) + new_end = format_timestamp(current_time + duration) frame['new_timestamp'] = f"{new_start}-{new_end}" # 更新当前时间点 @@ -443,7 +468,7 @@ class ScriptProcessor: with open(file_name, 'w', encoding='utf-8') as file: json.dump(frame_content_list, file, ensure_ascii=False, indent=4) - logger.info(f"保存脚本成功,总时长: {seconds_to_time(current_time)}") + logger.info(f"保存脚本成功,总时长: {format_timestamp(current_time)}") except Exception as e: logger.error(f"保存结果时发生错误: {str(e)}\n{traceback.format_exc()}") diff --git a/app/utils/vision_analyzer.py b/app/utils/vision_analyzer.py index 06342d7..07306c5 100644 --- a/app/utils/vision_analyzer.py +++ b/app/utils/vision_analyzer.py @@ -10,6 +10,7 @@ from google.api_core import exceptions import google.generativeai as genai import PIL.Image import traceback +from app.utils import utils class VisionAnalyzer: @@ -146,14 +147,34 @@ class VisionAnalyzer: response_text = result['response'] image_paths = result['image_paths'] - img_name_start = Path(image_paths[0]).stem.split('_')[-1] - img_name_end = Path(image_paths[-1]).stem.split('_')[-1] - txt_path = os.path.join(output_dir, f"frame_{img_name_start}_{img_name_end}.txt") + # 从文件名中提取时间戳并转换为标准格式 + def format_timestamp(img_path): + # 从文件名中提取时间部分 + timestamp = Path(img_path).stem.split('_')[-1] + try: + # 将时间转换为秒 + seconds = utils.time_to_seconds(timestamp.replace('_', ':')) + # 转换为 HH:MM:SS,mmm 格式 + hours = int(seconds // 3600) + minutes = int((seconds % 3600) // 60) + seconds_remainder = seconds % 60 + whole_seconds = int(seconds_remainder) + milliseconds = int((seconds_remainder - whole_seconds) * 1000) + + return f"{hours:02d}:{minutes:02d}:{whole_seconds:02d},{milliseconds:03d}" + except Exception as e: + logger.error(f"时间戳格式转换错误: {timestamp}, {str(e)}") + return timestamp + + start_timestamp = format_timestamp(image_paths[0]) + end_timestamp = format_timestamp(image_paths[-1]) + + txt_path = os.path.join(output_dir, f"frame_{start_timestamp}_{end_timestamp}.txt") # 保存结果到txt文件 with open(txt_path, 'w', encoding='utf-8') as f: f.write(response_text.strip()) - print(f"已保存分析结果到: {txt_path}") + logger.info(f"已保存分析结果到: {txt_path}") def load_images(self, image_paths: List[str]) -> List[PIL.Image.Image]: """