Merge pull request #71 from linyqh/dev_v2

0.3.9新版本发布
This commit is contained in:
viccy 2024-12-06 18:46:34 +08:00 committed by GitHub
commit 894ba13026
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
45 changed files with 3946 additions and 1256 deletions

12
.gitignore vendored
View File

@ -23,8 +23,12 @@ node_modules
# 模型目录
/models/
./models/*
resource/scripts/*
resource/videos/*
resource/songs/*
resource/fonts/*
resource/scripts/*.json
resource/videos/*.mp4
resource/songs/*.mp3
resource/songs/*.flac
resource/fonts/*.ttc
resource/fonts/*.ttf
resource/fonts/*.otf
resource/srt/*.srt
app/models/faster-whisper-large-v2/*

View File

@ -43,6 +43,9 @@ NarratoAI 是一个自动化影视解说工具基于LLM实现文案撰写、
- [x] 发布 0.3.5 整合包
- [ ] 支持阿里 Qwen2-VL 大模型理解视频
- [ ] 支持短剧解说
- [x] 合并素材
- [ ] 一键转录
- [ ] 一键清理缓存
- [ ] ...
## 配置要求 📦

View File

@ -163,109 +163,109 @@ def delete_video(request: Request, task_id: str = Path(..., description="Task ID
)
@router.get(
"/musics", response_model=BgmRetrieveResponse, summary="Retrieve local BGM files"
)
def get_bgm_list(request: Request):
suffix = "*.mp3"
song_dir = utils.song_dir()
files = glob.glob(os.path.join(song_dir, suffix))
bgm_list = []
for file in files:
bgm_list.append(
{
"name": os.path.basename(file),
"size": os.path.getsize(file),
"file": file,
}
)
response = {"files": bgm_list}
return utils.get_response(200, response)
# @router.get(
# "/musics", response_model=BgmRetrieveResponse, summary="Retrieve local BGM files"
# )
# def get_bgm_list(request: Request):
# suffix = "*.mp3"
# song_dir = utils.song_dir()
# files = glob.glob(os.path.join(song_dir, suffix))
# bgm_list = []
# for file in files:
# bgm_list.append(
# {
# "name": os.path.basename(file),
# "size": os.path.getsize(file),
# "file": file,
# }
# )
# response = {"files": bgm_list}
# return utils.get_response(200, response)
#
@router.post(
"/musics",
response_model=BgmUploadResponse,
summary="Upload the BGM file to the songs directory",
)
def upload_bgm_file(request: Request, file: UploadFile = File(...)):
request_id = base.get_task_id(request)
# check file ext
if file.filename.endswith("mp3"):
song_dir = utils.song_dir()
save_path = os.path.join(song_dir, file.filename)
# save file
with open(save_path, "wb+") as buffer:
# If the file already exists, it will be overwritten
file.file.seek(0)
buffer.write(file.file.read())
response = {"file": save_path}
return utils.get_response(200, response)
raise HttpException(
"", status_code=400, message=f"{request_id}: Only *.mp3 files can be uploaded"
)
@router.get("/stream/{file_path:path}")
async def stream_video(request: Request, file_path: str):
tasks_dir = utils.task_dir()
video_path = os.path.join(tasks_dir, file_path)
range_header = request.headers.get("Range")
video_size = os.path.getsize(video_path)
start, end = 0, video_size - 1
length = video_size
if range_header:
range_ = range_header.split("bytes=")[1]
start, end = [int(part) if part else None for part in range_.split("-")]
if start is None:
start = video_size - end
end = video_size - 1
if end is None:
end = video_size - 1
length = end - start + 1
def file_iterator(file_path, offset=0, bytes_to_read=None):
with open(file_path, "rb") as f:
f.seek(offset, os.SEEK_SET)
remaining = bytes_to_read or video_size
while remaining > 0:
bytes_to_read = min(4096, remaining)
data = f.read(bytes_to_read)
if not data:
break
remaining -= len(data)
yield data
response = StreamingResponse(
file_iterator(video_path, start, length), media_type="video/mp4"
)
response.headers["Content-Range"] = f"bytes {start}-{end}/{video_size}"
response.headers["Accept-Ranges"] = "bytes"
response.headers["Content-Length"] = str(length)
response.status_code = 206 # Partial Content
return response
@router.get("/download/{file_path:path}")
async def download_video(_: Request, file_path: str):
"""
download video
:param _: Request request
:param file_path: video file path, eg: /cd1727ed-3473-42a2-a7da-4faafafec72b/final-1.mp4
:return: video file
"""
tasks_dir = utils.task_dir()
video_path = os.path.join(tasks_dir, file_path)
file_path = pathlib.Path(video_path)
filename = file_path.stem
extension = file_path.suffix
headers = {"Content-Disposition": f"attachment; filename={filename}{extension}"}
return FileResponse(
path=video_path,
headers=headers,
filename=f"{filename}{extension}",
media_type=f"video/{extension[1:]}",
)
# @router.post(
# "/musics",
# response_model=BgmUploadResponse,
# summary="Upload the BGM file to the songs directory",
# )
# def upload_bgm_file(request: Request, file: UploadFile = File(...)):
# request_id = base.get_task_id(request)
# # check file ext
# if file.filename.endswith("mp3"):
# song_dir = utils.song_dir()
# save_path = os.path.join(song_dir, file.filename)
# # save file
# with open(save_path, "wb+") as buffer:
# # If the file already exists, it will be overwritten
# file.file.seek(0)
# buffer.write(file.file.read())
# response = {"file": save_path}
# return utils.get_response(200, response)
#
# raise HttpException(
# "", status_code=400, message=f"{request_id}: Only *.mp3 files can be uploaded"
# )
#
#
# @router.get("/stream/{file_path:path}")
# async def stream_video(request: Request, file_path: str):
# tasks_dir = utils.task_dir()
# video_path = os.path.join(tasks_dir, file_path)
# range_header = request.headers.get("Range")
# video_size = os.path.getsize(video_path)
# start, end = 0, video_size - 1
#
# length = video_size
# if range_header:
# range_ = range_header.split("bytes=")[1]
# start, end = [int(part) if part else None for part in range_.split("-")]
# if start is None:
# start = video_size - end
# end = video_size - 1
# if end is None:
# end = video_size - 1
# length = end - start + 1
#
# def file_iterator(file_path, offset=0, bytes_to_read=None):
# with open(file_path, "rb") as f:
# f.seek(offset, os.SEEK_SET)
# remaining = bytes_to_read or video_size
# while remaining > 0:
# bytes_to_read = min(4096, remaining)
# data = f.read(bytes_to_read)
# if not data:
# break
# remaining -= len(data)
# yield data
#
# response = StreamingResponse(
# file_iterator(video_path, start, length), media_type="video/mp4"
# )
# response.headers["Content-Range"] = f"bytes {start}-{end}/{video_size}"
# response.headers["Accept-Ranges"] = "bytes"
# response.headers["Content-Length"] = str(length)
# response.status_code = 206 # Partial Content
#
# return response
#
#
# @router.get("/download/{file_path:path}")
# async def download_video(_: Request, file_path: str):
# """
# download video
# :param _: Request request
# :param file_path: video file path, eg: /cd1727ed-3473-42a2-a7da-4faafafec72b/final-1.mp4
# :return: video file
# """
# tasks_dir = utils.task_dir()
# video_path = os.path.join(tasks_dir, file_path)
# file_path = pathlib.Path(video_path)
# filename = file_path.stem
# extension = file_path.suffix
# headers = {"Content-Disposition": f"attachment; filename={filename}{extension}"}
# return FileResponse(
# path=video_path,
# headers=headers,
# filename=f"{filename}{extension}",
# media_type=f"video/{extension[1:]}",
# )

View File

@ -0,0 +1,11 @@
from fastapi import APIRouter, Depends
def v2_router(dependencies=None):
router = APIRouter()
router.tags = ["V2"]
router.prefix = "/api/v2"
# 将认证依赖项应用于所有路由
if dependencies:
router.dependencies = dependencies
return router

View File

@ -0,0 +1,170 @@
from fastapi import APIRouter, BackgroundTasks
from loguru import logger
import os
from app.models.schema_v2 import (
GenerateScriptRequest,
GenerateScriptResponse,
CropVideoRequest,
CropVideoResponse,
DownloadVideoRequest,
DownloadVideoResponse,
StartSubclipRequest,
StartSubclipResponse
)
from app.models.schema import VideoClipParams
from app.services.script_service import ScriptGenerator
from app.services.video_service import VideoService
from app.utils import utils
from app.controllers.v2.base import v2_router
from app.models.schema import VideoClipParams
from app.services.youtube_service import YoutubeService
from app.services import task as task_service
router = v2_router()
@router.post(
"/scripts/generate",
response_model=GenerateScriptResponse,
summary="同步请求;生成视频脚本 (V2)"
)
async def generate_script(
request: GenerateScriptRequest,
background_tasks: BackgroundTasks
):
"""
生成视频脚本的V2版本API
"""
task_id = utils.get_uuid()
try:
generator = ScriptGenerator()
script = await generator.generate_script(
video_path=request.video_path,
video_theme=request.video_theme,
custom_prompt=request.custom_prompt,
skip_seconds=request.skip_seconds,
threshold=request.threshold,
vision_batch_size=request.vision_batch_size,
vision_llm_provider=request.vision_llm_provider
)
return {
"task_id": task_id,
"script": script
}
except Exception as e:
logger.exception(f"Generate script failed: {str(e)}")
raise
@router.post(
"/scripts/crop",
response_model=CropVideoResponse,
summary="同步请求;裁剪视频 (V2)"
)
async def crop_video(
request: CropVideoRequest,
background_tasks: BackgroundTasks
):
"""
根据脚本裁剪视频的V2版本API
"""
try:
# 调用视频裁剪服务
video_service = VideoService()
task_id, subclip_videos = await video_service.crop_video(
video_path=request.video_origin_path,
video_script=request.video_script
)
logger.debug(f"裁剪视频成功,视频片段路径: {subclip_videos}")
logger.debug(type(subclip_videos))
return {
"task_id": task_id,
"subclip_videos": subclip_videos
}
except Exception as e:
logger.exception(f"Crop video failed: {str(e)}")
raise
@router.post(
"/youtube/download",
response_model=DownloadVideoResponse,
summary="同步请求下载YouTube视频 (V2)"
)
async def download_youtube_video(
request: DownloadVideoRequest,
background_tasks: BackgroundTasks
):
"""
下载指定分辨率的YouTube视频
"""
try:
youtube_service = YoutubeService()
task_id, output_path, filename = await youtube_service.download_video(
url=request.url,
resolution=request.resolution,
output_format=request.output_format,
rename=request.rename
)
return {
"task_id": task_id,
"output_path": output_path,
"resolution": request.resolution,
"format": request.output_format,
"filename": filename
}
except Exception as e:
logger.exception(f"Download YouTube video failed: {str(e)}")
raise
@router.post(
"/scripts/start-subclip",
response_model=StartSubclipResponse,
summary="异步请求;开始视频剪辑任务 (V2)"
)
async def start_subclip(
request: VideoClipParams,
task_id: str,
subclip_videos: dict,
background_tasks: BackgroundTasks
):
"""
开始视频剪辑任务的V2版本API
"""
try:
# 构建参数对象
params = VideoClipParams(
video_origin_path=request.video_origin_path,
video_clip_json_path=request.video_clip_json_path,
voice_name=request.voice_name,
voice_rate=request.voice_rate,
voice_pitch=request.voice_pitch,
subtitle_enabled=request.subtitle_enabled,
video_aspect=request.video_aspect,
n_threads=request.n_threads
)
# 在后台任务中执行视频剪辑
background_tasks.add_task(
task_service.start_subclip,
task_id=task_id,
params=params,
subclip_path_videos=subclip_videos
)
return {
"task_id": task_id,
"state": "PROCESSING" # 初始状态
}
except Exception as e:
logger.exception(f"Start subclip task failed: {str(e)}")
raise

View File

@ -366,6 +366,8 @@ class VideoClipParams(BaseModel):
custom_position: float = Field(default=70.0, description="自定义位置")
n_threads: Optional[int] = 8 # 线程数,有助于提升视频处理速度
tts_volume: float = 1.0 # TTS音频音量
video_volume: float = 0.1 # 视频原声音量
class VideoTranscriptionRequest(BaseModel):
video_name: str

62
app/models/schema_v2.py Normal file
View File

@ -0,0 +1,62 @@
from typing import Optional, List
from pydantic import BaseModel
class GenerateScriptRequest(BaseModel):
video_path: str
video_theme: Optional[str] = ""
custom_prompt: Optional[str] = ""
skip_seconds: Optional[int] = 0
threshold: Optional[int] = 30
vision_batch_size: Optional[int] = 5
vision_llm_provider: Optional[str] = "gemini"
class GenerateScriptResponse(BaseModel):
task_id: str
script: List[dict]
class CropVideoRequest(BaseModel):
video_origin_path: str
video_script: List[dict]
class CropVideoResponse(BaseModel):
task_id: str
subclip_videos: dict
class DownloadVideoRequest(BaseModel):
url: str
resolution: str
output_format: Optional[str] = "mp4"
rename: Optional[str] = None
class DownloadVideoResponse(BaseModel):
task_id: str
output_path: str
resolution: str
format: str
filename: str
class StartSubclipRequest(BaseModel):
task_id: str
video_origin_path: str
video_clip_json_path: str
voice_name: Optional[str] = None
voice_rate: Optional[int] = 0
voice_pitch: Optional[int] = 0
subtitle_enabled: Optional[bool] = True
video_aspect: Optional[str] = "16:9"
n_threads: Optional[int] = 4
subclip_videos: list # 从裁剪视频接口获取的视频片段字典
class StartSubclipResponse(BaseModel):
task_id: str
state: str
videos: Optional[List[str]] = None
combined_videos: Optional[List[str]] = None

View File

@ -10,8 +10,12 @@ Resources:
from fastapi import APIRouter
from app.controllers.v1 import llm, video
from app.controllers.v2 import script
root_api_router = APIRouter()
# v1
root_api_router.include_router(video.router)
root_api_router.include_router(llm.router)
# v2
root_api_router.include_router(script.router)

View File

@ -18,95 +18,119 @@ def check_ffmpeg():
return False
def merge_audio_files(task_id: str, audio_file_paths: List[str], total_duration: int, video_script: list):
def merge_audio_files(task_id: str, audio_files: list, total_duration: float, list_script: list):
"""
合并多个音频文件到一个指定总时长的音频文件中并生成相应的字幕
:param task_id: 任务ID
:param audio_file_paths: 音频文件路径列表
:param total_duration: 最终音频文件的总时长
:param video_script: JSON格式的视频脚本
合并音频文件根据OST设置处理不同的音频轨道
Args:
task_id: 任务ID
audio_files: TTS生成的音频文件列表
total_duration: 总时长
list_script: 完整脚本信息包含OST设置
Returns:
str: 合并后的音频文件路径
"""
output_dir = utils.task_dir(task_id)
# 检查FFmpeg是否安装
if not check_ffmpeg():
logger.error("错误FFmpeg未安装。请安装FFmpeg后再运行此脚本。")
return None, None
logger.error("FFmpeg未安装无法合并音频文件")
return None
# 创建一个总时长为total_duration的空白音频
blank_audio = AudioSegment.silent(duration=total_duration * 1000) # pydub使用毫秒
# 创建一个空的音频片段
final_audio = AudioSegment.silent(duration=total_duration * 1000) # 总时长以毫秒为单位
for audio_path in audio_file_paths:
if not os.path.exists(audio_path):
logger.info(f"警告:文件 {audio_path} 不存在,已跳过。")
# 遍历脚本中的每个片段
for segment, audio_file in zip(list_script, audio_files):
try:
# 加载TTS音频文件
tts_audio = AudioSegment.from_file(audio_file)
# 获取片段的开始和结束时间
start_time, end_time = segment['new_timestamp'].split('-')
start_seconds = utils.time_to_seconds(start_time)
end_seconds = utils.time_to_seconds(end_time)
# 根据OST设置处理音频
if segment['OST'] == 0:
# 只使用TTS音频
final_audio = final_audio.overlay(tts_audio, position=start_seconds * 1000)
elif segment['OST'] == 1:
# 只使用原声(假设原声已经在视频中)
continue
elif segment['OST'] == 2:
# 混合TTS音频和原声
original_audio = AudioSegment.silent(duration=(end_seconds - start_seconds) * 1000)
mixed_audio = original_audio.overlay(tts_audio)
final_audio = final_audio.overlay(mixed_audio, position=start_seconds * 1000)
except Exception as e:
logger.error(f"处理音频文件 {audio_file} 时出错: {str(e)}")
continue
# 从文件名中提取时间戳
filename = os.path.basename(audio_path)
start_time, end_time = extract_timestamp(filename)
# 保存合并后的音频文件
output_audio_path = os.path.join(utils.task_dir(task_id), "final_audio.mp3")
final_audio.export(output_audio_path, format="mp3")
logger.info(f"合并后的音频文件已保存: {output_audio_path}")
# 读取音频文件
try:
audio = AudioSegment.from_mp3(audio_path)
except Exception as e:
logger.error(f"错误:无法读取文件 {audio_path}。错误信息:{str(e)}")
continue
# 将音频插入到空白音频的指定位置
blank_audio = blank_audio.overlay(audio, position=start_time * 1000)
# 尝试导出为WAV格式
try:
output_file = os.path.join(output_dir, "audio.wav")
blank_audio.export(output_file, format="wav")
logger.info(f"音频合并完成,已保存为 {output_file}")
except Exception as e:
logger.info(f"导出为WAV格式失败尝试使用MP3格式{str(e)}")
try:
output_file = os.path.join(output_dir, "audio.mp3")
blank_audio.export(output_file, format="mp3", codec="libmp3lame")
logger.info(f"音频合并完成,已保存为 {output_file}")
except Exception as e:
logger.error(f"导出音频失败:{str(e)}")
return None, None
return output_file
def parse_timestamp(timestamp: str):
"""解析时间戳字符串为秒数"""
# 确保使用冒号作为分隔符
timestamp = timestamp.replace('_', ':')
return time_to_seconds(timestamp)
def extract_timestamp(filename):
"""从文件名中提取开始和结束时间戳"""
# 从 "audio_00_06-00_24.mp3" 这样的格式中提取时间
time_part = filename.split('_', 1)[1].split('.')[0] # 获取 "00_06-00_24" 部分
start_time, end_time = time_part.split('-') # 分割成 "00_06" 和 "00_24"
# 将下划线格式转换回冒号格式
start_time = start_time.replace('_', ':')
end_time = end_time.replace('_', ':')
# 将时间戳转换为秒
start_seconds = time_to_seconds(start_time)
end_seconds = time_to_seconds(end_time)
return start_seconds, end_seconds
return output_audio_path
def time_to_seconds(time_str):
""""00:06""00_06" 格式转换为总秒数"""
# 确保使用冒号作为分隔符
time_str = time_str.replace('_', ':')
"""
将时间字符串转换为秒数支持多种格式
1. 'HH:MM:SS,mmm' (::,毫秒)
2. 'MM:SS,mmm' (:,毫秒)
3. 'SS,mmm' (,毫秒)
"""
try:
parts = time_str.split(':')
if len(parts) != 2:
logger.error(f"Invalid time format: {time_str}")
return 0
return int(parts[0]) * 60 + int(parts[1])
# 处理毫秒部分
if ',' in time_str:
time_part, ms_part = time_str.split(',')
ms = float(ms_part) / 1000
else:
time_part = time_str
ms = 0
# 分割时间部分
parts = time_part.split(':')
if len(parts) == 3: # HH:MM:SS
h, m, s = map(int, parts)
seconds = h * 3600 + m * 60 + s
elif len(parts) == 2: # MM:SS
m, s = map(int, parts)
seconds = m * 60 + s
else: # SS
seconds = int(parts[0])
return seconds + ms
except (ValueError, IndexError) as e:
logger.error(f"Error parsing time {time_str}: {str(e)}")
return 0
return 0.0
def extract_timestamp(filename):
"""
从文件名中提取开始和结束时间戳
例如: "audio_00_06,500-00_24,800.mp3" -> (6.5, 24.8)
"""
try:
# 从文件名中提取时间部分
time_part = filename.split('_', 1)[1].split('.')[0] # 获取 "00_06,500-00_24,800" 部分
start_time, end_time = time_part.split('-') # 分割成开始和结束时间
# 将下划线格式转换回冒号格式
start_time = start_time.replace('_', ':')
end_time = end_time.replace('_', ':')
# 将时间戳转换为秒
start_seconds = time_to_seconds(start_time)
end_seconds = time_to_seconds(end_time)
return start_seconds, end_seconds
except Exception as e:
logger.error(f"Error extracting timestamp from {filename}: {str(e)}")
return 0.0, 0.0
if __name__ == "__main__":

View File

@ -3,6 +3,7 @@ import subprocess
import random
import traceback
from urllib.parse import urlencode
from datetime import datetime
import requests
from typing import List
@ -254,70 +255,105 @@ def download_videos(
def time_to_seconds(time_str: str) -> float:
"""
将时间字符串转换为秒数
支持格式
1. "MM:SS" (:)
2. "SS" (纯秒数)
支持格式: 'HH:MM:SS,mmm' (::,毫秒)
Args:
time_str: 时间字符串, "00:00:20,100"
Returns:
float: 转换后的秒数(包含毫秒)
"""
parts = time_str.split(':')
if len(parts) == 2:
minutes, seconds = map(float, parts)
return minutes * 60 + seconds
return float(time_str)
try:
# 处理毫秒部分
if ',' in time_str:
time_part, ms_part = time_str.split(',')
ms = int(ms_part) / 1000
else:
time_part = time_str
ms = 0
# 处理时分秒
parts = time_part.split(':')
if len(parts) == 3: # HH:MM:SS
h, m, s = map(int, parts)
seconds = h * 3600 + m * 60 + s
else:
raise ValueError("时间格式必须为 HH:MM:SS,mmm")
return seconds + ms
except ValueError as e:
logger.error(f"时间格式错误: {time_str}")
raise ValueError(f"时间格式错误: 必须为 HH:MM:SS,mmm 格式") from e
def format_timestamp(seconds: float) -> str:
"""
将秒数转换为 "MM:SS" 格式的时间字符串
将秒数转换为可读的时间格式 (HH:MM:SS,mmm)
Args:
seconds: 秒数(可包含毫秒)
Returns:
str: 格式化的时间字符串, "00:00:20,100"
"""
minutes = int(seconds) // 60
secs = int(seconds) % 60
return f"{minutes:02d}:{secs:02d}"
hours = int(seconds // 3600)
minutes = int((seconds % 3600) // 60)
seconds_remain = seconds % 60
whole_seconds = int(seconds_remain)
milliseconds = int((seconds_remain - whole_seconds) * 1000)
return f"{hours:02d}:{minutes:02d}:{whole_seconds:02d},{milliseconds:03d}"
def save_clip_video(timestamp: str, origin_video: str, save_dir: str = "") -> dict:
"""
保存剪辑后的视频
Args:
timestamp: 需要裁剪的单个时间戳支持两种格式
1. '00:36-00:40' (:-:)
2. 'SS-SS' (-)
timestamp: 需要裁剪的时间戳,格式为 'HH:MM:SS,mmm-HH:MM:SS,mmm'
例如: '00:00:00,000-00:00:20,100'
origin_video: 原视频路径
save_dir: 存储目录
Returns:
裁剪后的视频路径格式为 {timestamp: video_path}
dict: 裁剪后的视频路径,格式为 {timestamp: video_path}
"""
# 使用新的路径结构
if not save_dir:
save_dir = utils.storage_dir("cache_videos")
base_dir = os.path.join(utils.temp_dir(), "clip_video")
video_hash = utils.md5(origin_video)
save_dir = os.path.join(base_dir, video_hash)
if not os.path.exists(save_dir):
os.makedirs(save_dir)
video_id = f"vid-{timestamp.replace(':', '_')}"
video_path = f"{save_dir}/{video_id}.mp4"
# 生成更规范的视频文件名
video_id = f"vid-{timestamp.replace(':', '-').replace(',', '_')}"
video_path = os.path.join(save_dir, f"{video_id}.mp4")
if os.path.exists(video_path) and os.path.getsize(video_path) > 0:
logger.info(f"video already exists: {video_path}")
return {timestamp: video_path}
try:
# 加载视频获取总时长
# 加载视频获取总时长
video = VideoFileClip(origin_video)
total_duration = video.duration
# 获取目标时间段
# 解析时间戳
start_str, end_str = timestamp.split('-')
start = time_to_seconds(start_str)
end = time_to_seconds(end_str)
# 验证时间段是否有效
# 验证时间段
if start >= total_duration:
logger.warning(f"起始时间 {format_timestamp(start)} ({start:.2f}秒) 超出视频总时长 {format_timestamp(total_duration)} ({total_duration:.2f}秒)")
logger.warning(f"起始时间 {format_timestamp(start)} ({start:.3f}秒) 超出视频总时长 {format_timestamp(total_duration)} ({total_duration:.3f}秒)")
video.close()
return {}
if end > total_duration:
logger.warning(f"结束时间 {format_timestamp(end)} ({end:.2f}秒) 超出视频总时长 {format_timestamp(total_duration)} ({total_duration:.2f}秒),将自动调整为视频结尾")
logger.warning(f"结束时间 {format_timestamp(end)} ({end:.3f}秒) 超出视频总时长 {format_timestamp(total_duration)} ({total_duration:.3f}秒),将自动调整为视频结尾")
end = total_duration
if end <= start:
@ -328,11 +364,21 @@ def save_clip_video(timestamp: str, origin_video: str, save_dir: str = "") -> di
# 剪辑视频
duration = end - start
logger.info(f"开始剪辑视频: {format_timestamp(start)} - {format_timestamp(end)},时长 {format_timestamp(duration)}")
# 剪辑视频
subclip = video.subclip(start, end)
try:
# 检查视频是否有音频轨道并写入文件
subclip.write_videofile(video_path, audio=(subclip.audio is not None), logger=None)
subclip.write_videofile(
video_path,
codec='libx264',
audio_codec='aac',
temp_audiofile='temp-audio.m4a',
remove_temp=True,
audio=(subclip.audio is not None),
logger=None
)
# 验证生成的视频文件
if os.path.exists(video_path) and os.path.getsize(video_path) > 0:
@ -363,12 +409,12 @@ def save_clip_video(timestamp: str, origin_video: str, save_dir: str = "") -> di
return {}
def clip_videos(task_id: str, timestamp_terms: List[str], origin_video: str, progress_callback=None):
def clip_videos(task_id: str, timestamp_terms: List[str], origin_video: str, progress_callback=None) -> dict:
"""
剪辑视频
Args:
task_id: 任务id
timestamp_terms: 需要剪辑的时间戳列表:['00:00-00:20', '00:36-00:40', '07:07-07:22']
timestamp_terms: 需要剪辑的时间戳列表:['00:00:00,000-00:00:20,100', '00:00:43,039-00:00:46,959']
origin_video: 原视频路径
progress_callback: 进度回调函数
@ -379,11 +425,6 @@ def clip_videos(task_id: str, timestamp_terms: List[str], origin_video: str, pro
total_items = len(timestamp_terms)
for index, item in enumerate(timestamp_terms):
material_directory = config.app.get("material_directory", "").strip()
if material_directory == "task":
material_directory = utils.task_dir(task_id)
elif material_directory and not os.path.isdir(material_directory):
material_directory = ""
try:
saved_video_path = save_clip_video(timestamp=item, origin_video=origin_video, save_dir=material_directory)
if saved_video_path:
@ -396,6 +437,7 @@ def clip_videos(task_id: str, timestamp_terms: List[str], origin_video: str, pro
except Exception as e:
logger.error(f"视频裁剪失败: {utils.to_json(item)} =>\n{str(traceback.format_exc())}")
return {}
logger.success(f"裁剪 {len(video_paths)} videos")
return video_paths
@ -455,29 +497,3 @@ def merge_videos(video_paths, ost_list):
os.remove(silent_video)
return output_file
# 使用示例
# if __name__ == "__main__":
# video_paths = ['/Users/apple/Desktop/home/NarratoAI/storage/cache_videos/vid-01_17-01_37.mp4', '/Users/apple/Desktop/home/NarratoAI/storage/cache_videos/vid-00_00-00_06.mp4',
# '/Users/apple/Desktop/home/NarratoAI/storage/cache_videos/vid-00_06-00_09.mp4', '/Users/apple/Desktop/home/NarratoAI/storage/cache_videos/vid-01_03-01_10.mp4',
# '/Users/apple/Desktop/home/NarratoAI/storage/cache_videos/vid-01_10-01_17.mp4', '/Users/apple/Desktop/home/NarratoAI/storage/cache_videos/vid-00_24-00_27.mp4',
# '/Users/apple/Desktop/home/NarratoAI/storage/cache_videos/vid-01_28-01_36.mp4', '/Users/apple/Desktop/home/NarratoAI/storage/cache_videos/vid-00_32-00_41.mp4',
# '/Users/apple/Desktop/home/NarratoAI/storage/cache_videos/vid-01_36-01_58.mp4', '/Users/apple/Desktop/home/NarratoAI/storage/cache_videos/vid-00_12-00_15.mp4',
# '/Users/apple/Desktop/home/NarratoAI/storage/cache_videos/vid-00_09-00_12.mp4', '/Users/apple/Desktop/home/NarratoAI/storage/cache_videos/vid-02_12-02_25.mp4',
# '/Users/apple/Desktop/home/NarratoAI/storage/cache_videos/vid-02_03-02_12.mp4', '/Users/apple/Desktop/home/NarratoAI/storage/cache_videos/vid-01_58-02_03.mp4',
# '/Users/apple/Desktop/home/NarratoAI/storage/cache_videos/vid-03_14-03_18.mp4', '/Users/apple/Desktop/home/NarratoAI/storage/cache_videos/vid-03_18-03_20.mp4']
#
# ost_list = [True, False, False, False, False, False, False, False, True, False, False, False, False, False, False,
# False]
#
# result = merge_videos(video_paths, ost_list)
# if result:
# print(f"合并后的视频文件:{result}")
# else:
# print("视频合并失败")
#
if __name__ == "__main__":
save_clip_video('00:50-01:41', 'E:\\projects\\NarratoAI\\resource\\videos\\WeChat_20241110144511.mp4')

View File

@ -0,0 +1,405 @@
import os
import json
import time
import asyncio
import requests
from loguru import logger
from typing import List, Dict, Any, Callable
from app.utils import utils, gemini_analyzer, video_processor, video_processor_v2
from app.utils.script_generator import ScriptProcessor
from app.config import config
class ScriptGenerator:
def __init__(self):
self.temp_dir = utils.temp_dir()
self.keyframes_dir = os.path.join(self.temp_dir, "keyframes")
async def generate_script(
self,
video_path: str,
video_theme: str = "",
custom_prompt: str = "",
skip_seconds: int = 0,
threshold: int = 30,
vision_batch_size: int = 5,
vision_llm_provider: str = "gemini",
progress_callback: Callable[[float, str], None] = None
) -> List[Dict[Any, Any]]:
"""
生成视频脚本的核心逻辑
Args:
video_path: 视频文件路径
video_theme: 视频主题
custom_prompt: 自定义提示词
skip_seconds: 跳过开始的秒数
threshold: 差异<EFBFBD><EFBFBD><EFBFBD>
vision_batch_size: 视觉处理批次大小
vision_llm_provider: 视觉模型提供商
progress_callback: 进度回调函数
Returns:
List[Dict]: 生成的视频脚本
"""
if progress_callback is None:
progress_callback = lambda p, m: None
try:
# 提取关键帧
progress_callback(10, "正在提取关键帧...")
keyframe_files = await self._extract_keyframes(
video_path,
skip_seconds,
threshold
)
if vision_llm_provider == "gemini":
script = await self._process_with_gemini(
keyframe_files,
video_theme,
custom_prompt,
vision_batch_size,
progress_callback
)
elif vision_llm_provider == "narratoapi":
script = await self._process_with_narrato(
keyframe_files,
video_theme,
custom_prompt,
vision_batch_size,
progress_callback
)
else:
raise ValueError(f"Unsupported vision provider: {vision_llm_provider}")
return json.loads(script) if isinstance(script, str) else script
except Exception as e:
logger.exception("Generate script failed")
raise
async def _extract_keyframes(
self,
video_path: str,
skip_seconds: int,
threshold: int
) -> List[str]:
"""提取视频关键帧"""
video_hash = utils.md5(video_path + str(os.path.getmtime(video_path)))
video_keyframes_dir = os.path.join(self.keyframes_dir, video_hash)
# 检查缓存
keyframe_files = []
if os.path.exists(video_keyframes_dir):
for filename in sorted(os.listdir(video_keyframes_dir)):
if filename.endswith('.jpg'):
keyframe_files.append(os.path.join(video_keyframes_dir, filename))
if keyframe_files:
logger.info(f"Using cached keyframes: {video_keyframes_dir}")
return keyframe_files
# 提取新的关键帧
os.makedirs(video_keyframes_dir, exist_ok=True)
try:
if config.frames.get("version") == "v2":
processor = video_processor_v2.VideoProcessor(video_path)
processor.process_video_pipeline(
output_dir=video_keyframes_dir,
skip_seconds=skip_seconds,
threshold=threshold
)
else:
processor = video_processor.VideoProcessor(video_path)
processor.process_video(
output_dir=video_keyframes_dir,
skip_seconds=skip_seconds
)
for filename in sorted(os.listdir(video_keyframes_dir)):
if filename.endswith('.jpg'):
keyframe_files.append(os.path.join(video_keyframes_dir, filename))
return keyframe_files
except Exception as e:
if os.path.exists(video_keyframes_dir):
import shutil
shutil.rmtree(video_keyframes_dir)
raise
async def _process_with_gemini(
self,
keyframe_files: List[str],
video_theme: str,
custom_prompt: str,
vision_batch_size: int,
progress_callback: Callable[[float, str], None]
) -> str:
"""使用Gemini处理视频帧"""
progress_callback(30, "正在初始化视觉分析器...")
# 获取Gemini配置
vision_api_key = config.app.get("vision_gemini_api_key")
vision_model = config.app.get("vision_gemini_model_name")
if not vision_api_key or not vision_model:
raise ValueError("未配置 Gemini API Key 或者模型")
analyzer = gemini_analyzer.VisionAnalyzer(
model_name=vision_model,
api_key=vision_api_key,
)
progress_callback(40, "正在分析关键帧...")
# 执行异步分析
results = await analyzer.analyze_images(
images=keyframe_files,
prompt=config.app.get('vision_analysis_prompt'),
batch_size=vision_batch_size
)
progress_callback(60, "正在整理分析结果...")
# 合并所有批次的分析结果
frame_analysis = ""
prev_batch_files = None
for result in results:
if 'error' in result:
logger.warning(f"批次 {result['batch_index']} 处理出现警告: {result['error']}")
continue
batch_files = self._get_batch_files(keyframe_files, result, vision_batch_size)
first_timestamp, last_timestamp, _ = self._get_batch_timestamps(batch_files, prev_batch_files)
# 添加带时间戳的分<E79A84><E58886>结果
frame_analysis += f"\n=== {first_timestamp}-{last_timestamp} ===\n"
frame_analysis += result['response']
frame_analysis += "\n"
prev_batch_files = batch_files
if not frame_analysis.strip():
raise Exception("未能生成有效的帧分析结果")
progress_callback(70, "正在生成脚本...")
# 构建帧内容列表
frame_content_list = []
prev_batch_files = None
for result in results:
if 'error' in result:
continue
batch_files = self._get_batch_files(keyframe_files, result, vision_batch_size)
_, _, timestamp_range = self._get_batch_timestamps(batch_files, prev_batch_files)
frame_content = {
"timestamp": timestamp_range,
"picture": result['response'],
"narration": "",
"OST": 2
}
frame_content_list.append(frame_content)
prev_batch_files = batch_files
if not frame_content_list:
raise Exception("没有有效的帧内容可以处理")
progress_callback(90, "正在生成文案...")
# 获取文本生<E69CAC><E7949F>配置
text_provider = config.app.get('text_llm_provider', 'gemini').lower()
text_api_key = config.app.get(f'text_{text_provider}_api_key')
text_model = config.app.get(f'text_{text_provider}_model_name')
processor = ScriptProcessor(
model_name=text_model,
api_key=text_api_key,
prompt=custom_prompt,
video_theme=video_theme
)
return processor.process_frames(frame_content_list)
async def _process_with_narrato(
self,
keyframe_files: List[str],
video_theme: str,
custom_prompt: str,
vision_batch_size: int,
progress_callback: Callable[[float, str], None]
) -> str:
"""使用NarratoAPI处理视频帧"""
# 创建临时目录
temp_dir = utils.temp_dir("narrato")
# 打包关键帧
progress_callback(30, "正在打包关键帧...")
zip_path = os.path.join(temp_dir, f"keyframes_{int(time.time())}.zip")
try:
if not utils.create_zip(keyframe_files, zip_path):
raise Exception("打包关键帧失败")
# 获取API配置
api_url = config.app.get("narrato_api_url")
api_key = config.app.get("narrato_api_key")
if not api_key:
raise ValueError("未配置 Narrato API Key")
headers = {
'X-API-Key': api_key,
'accept': 'application/json'
}
api_params = {
'batch_size': vision_batch_size,
'use_ai': False,
'start_offset': 0,
'vision_model': config.app.get('narrato_vision_model', 'gemini-1.5-flash'),
'vision_api_key': config.app.get('narrato_vision_key'),
'llm_model': config.app.get('narrato_llm_model', 'qwen-plus'),
'llm_api_key': config.app.get('narrato_llm_key'),
'custom_prompt': custom_prompt
}
progress_callback(40, "正在上传文件...")
with open(zip_path, 'rb') as f:
files = {'file': (os.path.basename(zip_path), f, 'application/x-zip-compressed')}
response = requests.post(
f"{api_url}/video/analyze",
headers=headers,
params=api_params,
files=files,
timeout=30
)
response.raise_for_status()
task_data = response.json()
task_id = task_data["data"].get('task_id')
if not task_id:
raise Exception(f"无效的API<EFBFBD><EFBFBD>应: {response.text}")
progress_callback(50, "正在等待分析结果...")
retry_count = 0
max_retries = 60
while retry_count < max_retries:
try:
status_response = requests.get(
f"{api_url}/video/tasks/{task_id}",
headers=headers,
timeout=10
)
status_response.raise_for_status()
task_status = status_response.json()['data']
if task_status['status'] == 'SUCCESS':
return task_status['result']['data']
elif task_status['status'] in ['FAILURE', 'RETRY']:
raise Exception(f"任务失败: {task_status.get('error')}")
retry_count += 1
time.sleep(2)
except requests.RequestException as e:
logger.warning(f"获取任务状态失败,重试中: {str(e)}")
retry_count += 1
time.sleep(2)
continue
raise Exception("任务执行超时")
finally:
# 清理临时文件
try:
if os.path.exists(zip_path):
os.remove(zip_path)
except Exception as e:
logger.warning(f"清理临时文件失败: {str(e)}")
def _get_batch_files(
self,
keyframe_files: List[str],
result: Dict[str, Any],
batch_size: int
) -> List[str]:
"""获取当前批次的图片文件"""
batch_start = result['batch_index'] * batch_size
batch_end = min(batch_start + batch_size, len(keyframe_files))
return keyframe_files[batch_start:batch_end]
def _get_batch_timestamps(
self,
batch_files: List[str],
prev_batch_files: List[str] = None
) -> tuple[str, str, str]:
"""获取一批文件的时间戳范围,支持毫秒级精度"""
if not batch_files:
logger.warning("Empty batch files")
return "00:00:00,000", "00:00:00,000", "00:00:00,000-00:00:00,000"
if len(batch_files) == 1 and prev_batch_files and len(prev_batch_files) > 0:
first_frame = os.path.basename(prev_batch_files[-1])
last_frame = os.path.basename(batch_files[0])
else:
first_frame = os.path.basename(batch_files[0])
last_frame = os.path.basename(batch_files[-1])
first_time = first_frame.split('_')[2].replace('.jpg', '')
last_time = last_frame.split('_')[2].replace('.jpg', '')
def format_timestamp(time_str: str) -> str:
"""将时间字符串转换为 HH:MM:SS,mmm 格式"""
try:
if len(time_str) < 4:
logger.warning(f"Invalid timestamp format: {time_str}")
return "00:00:00,000"
# 处理毫秒部分
if ',' in time_str:
time_part, ms_part = time_str.split(',')
ms = int(ms_part)
else:
time_part = time_str
ms = 0
# 处理时分秒
parts = time_part.split(':')
if len(parts) == 3: # HH:MM:SS
h, m, s = map(int, parts)
elif len(parts) == 2: # MM:SS
h = 0
m, s = map(int, parts)
else: # SS
h = 0
m = 0
s = int(parts[0])
# 处理进位
if s >= 60:
m += s // 60
s = s % 60
if m >= 60:
h += m // 60
m = m % 60
return f"{h:02d}:{m:02d}:{s:02d},{ms:03d}"
except Exception as e:
logger.error(f"时间戳格式转换错误 {time_str}: {str(e)}")
return "00:00:00,000"
first_timestamp = format_timestamp(first_time)
last_timestamp = format_timestamp(last_time)
timestamp_range = f"{first_timestamp}-{last_timestamp}"
return first_timestamp, last_timestamp, timestamp_range

View File

@ -8,6 +8,8 @@ from faster_whisper import WhisperModel
from timeit import default_timer as timer
from loguru import logger
import google.generativeai as genai
from moviepy.editor import VideoFileClip
import os
from app.config import config
from app.utils import utils
@ -362,29 +364,86 @@ def create_with_gemini(audio_file: str, subtitle_file: str = "", api_key: Option
return None
def extract_audio_and_create_subtitle(video_file: str, subtitle_file: str = "") -> Optional[str]:
"""
从视频文件中提取音频并生成字幕文件
参数:
- video_file: MP4视频文件的路径
- subtitle_file: 输出字幕文件的路径可选如果未提供将根据视频文件名自动生成
返回:
- str: 生成的字幕文件路径
- None: 如果处理过程中出现错误
"""
try:
# 获取视频文件所在目录
video_dir = os.path.dirname(video_file)
video_name = os.path.splitext(os.path.basename(video_file))[0]
# 设置音频文件路径
audio_file = os.path.join(video_dir, f"{video_name}_audio.wav")
# 如果未指定字幕文件路径,则自动生成
if not subtitle_file:
subtitle_file = os.path.join(video_dir, f"{video_name}.srt")
logger.info(f"开始从视频提取音频: {video_file}")
# 加载视频文件
video = VideoFileClip(video_file)
# 提取音频并保存为WAV格式
logger.info(f"正在提取音频到: {audio_file}")
video.audio.write_audiofile(audio_file, codec='pcm_s16le')
# 关闭视频文件
video.close()
logger.info("音频提取完成,开始生成字幕")
# 使用create函数生成字幕
create(audio_file, subtitle_file)
# 删除临时音频文件
if os.path.exists(audio_file):
os.remove(audio_file)
logger.info("已清理临时音频文件")
return subtitle_file
except Exception as e:
logger.error(f"处理视频文件时出错: {str(e)}")
logger.error(traceback.format_exc())
return None
if __name__ == "__main__":
task_id = "test456"
task_id = "123456"
task_dir = utils.task_dir(task_id)
subtitle_file = f"{task_dir}/subtitle.srt"
subtitle_file = f"{task_dir}/subtitle_123456.srt"
audio_file = f"{task_dir}/audio.wav"
video_file = "/Users/apple/Desktop/home/NarratoAI/resource/videos/merged_video_1702.mp4"
subtitles = file_to_subtitles(subtitle_file)
print(subtitles)
extract_audio_and_create_subtitle(video_file, subtitle_file)
# script_file = f"{task_dir}/script.json"
# with open(script_file, "r") as f:
# script_content = f.read()
# s = json.loads(script_content)
# script = s.get("script")
#
# correct(subtitle_file, script)
# subtitles = file_to_subtitles(subtitle_file)
# print(subtitles)
subtitle_file = f"{task_dir}/subtitle111.srt"
create(audio_file, subtitle_file)
# # script_file = f"{task_dir}/script.json"
# # with open(script_file, "r") as f:
# # script_content = f.read()
# # s = json.loads(script_content)
# # script = s.get("script")
# #
# # correct(subtitle_file, script)
# # 使用Gemini模型处理音频
# gemini_api_key = config.app.get("gemini_api_key") # 请替换为实际的API密钥
# gemini_subtitle_file = create_with_gemini(audio_file, api_key=gemini_api_key)
#
# if gemini_subtitle_file:
# print(f"Gemini生成的字幕文件: {gemini_subtitle_file}")
# subtitle_file = f"{task_dir}/subtitle111.srt"
# create(audio_file, subtitle_file)
# # # 使用Gemini模型处理音频
# # gemini_api_key = config.app.get("gemini_api_key") # 请替换为实际的API密钥
# # gemini_subtitle_file = create_with_gemini(audio_file, api_key=gemini_api_key)
# #
# # if gemini_subtitle_file:
# # print(f"Gemini生成的字幕文件: {gemini_subtitle_file}")

View File

@ -206,134 +206,14 @@ def generate_final_videos(
return final_video_paths, combined_video_paths
def start(task_id, params: VideoParams, stop_at: str = "video"):
logger.info(f"start task: {task_id}, stop_at: {stop_at}")
sm.state.update_task(task_id, state=const.TASK_STATE_PROCESSING, progress=5)
if type(params.video_concat_mode) is str:
params.video_concat_mode = VideoConcatMode(params.video_concat_mode)
# 1. Generate script
video_script = generate_script(task_id, params)
if not video_script:
sm.state.update_task(task_id, state=const.TASK_STATE_FAILED)
return
sm.state.update_task(task_id, state=const.TASK_STATE_PROCESSING, progress=10)
if stop_at == "script":
sm.state.update_task(
task_id, state=const.TASK_STATE_COMPLETE, progress=100, script=video_script
)
return {"script": video_script}
# 2. Generate terms
video_terms = ""
if params.video_source != "local":
video_terms = generate_terms(task_id, params, video_script)
if not video_terms:
sm.state.update_task(task_id, state=const.TASK_STATE_FAILED)
return
save_script_data(task_id, video_script, video_terms, params)
if stop_at == "terms":
sm.state.update_task(
task_id, state=const.TASK_STATE_COMPLETE, progress=100, terms=video_terms
)
return {"script": video_script, "terms": video_terms}
sm.state.update_task(task_id, state=const.TASK_STATE_PROCESSING, progress=20)
# 3. Generate audio
audio_file, audio_duration, sub_maker = generate_audio(task_id, params, video_script)
if not audio_file:
sm.state.update_task(task_id, state=const.TASK_STATE_FAILED)
return
sm.state.update_task(task_id, state=const.TASK_STATE_PROCESSING, progress=30)
if stop_at == "audio":
sm.state.update_task(
task_id,
state=const.TASK_STATE_COMPLETE,
progress=100,
audio_file=audio_file,
)
return {"audio_file": audio_file, "audio_duration": audio_duration}
# 4. Generate subtitle
subtitle_path = generate_subtitle(task_id, params, video_script, sub_maker, audio_file)
if stop_at == "subtitle":
sm.state.update_task(
task_id,
state=const.TASK_STATE_COMPLETE,
progress=100,
subtitle_path=subtitle_path,
)
return {"subtitle_path": subtitle_path}
sm.state.update_task(task_id, state=const.TASK_STATE_PROCESSING, progress=40)
# 5. Get video materials
downloaded_videos = get_video_materials(
task_id, params, video_terms, audio_duration
)
if not downloaded_videos:
sm.state.update_task(task_id, state=const.TASK_STATE_FAILED)
return
if stop_at == "materials":
sm.state.update_task(
task_id,
state=const.TASK_STATE_COMPLETE,
progress=100,
materials=downloaded_videos,
)
return {"materials": downloaded_videos}
sm.state.update_task(task_id, state=const.TASK_STATE_PROCESSING, progress=50)
# 6. Generate final videos
final_video_paths, combined_video_paths = generate_final_videos(
task_id, params, downloaded_videos, audio_file, subtitle_path
)
if not final_video_paths:
sm.state.update_task(task_id, state=const.TASK_STATE_FAILED)
return
logger.success(
f"task {task_id} finished, generated {len(final_video_paths)} videos."
)
kwargs = {
"videos": final_video_paths,
"combined_videos": combined_video_paths,
"script": video_script,
"terms": video_terms,
"audio_file": audio_file,
"audio_duration": audio_duration,
"subtitle_path": subtitle_path,
"materials": downloaded_videos,
}
sm.state.update_task(
task_id, state=const.TASK_STATE_COMPLETE, progress=100, **kwargs
)
return kwargs
def start_subclip(task_id: str, params: VideoClipParams, subclip_path_videos: list):
"""
后台任务自动剪辑视频进行剪辑
task_id: 任务ID
params: 剪辑参数
subclip_path_videos: 视频文件路径
"""
def start_subclip(task_id: str, params: VideoClipParams, subclip_path_videos: dict):
"""后台任务(自动剪辑视频进行剪辑)"""
logger.info(f"\n\n## 开始任务: {task_id}")
# 初始化 ImageMagick
if not utils.init_imagemagick():
logger.warning("ImageMagick 初始化失败,字幕可能无法正常显示")
sm.state.update_task(task_id, state=const.TASK_STATE_PROCESSING, progress=5)
# tts 角色名称
@ -341,8 +221,7 @@ def start_subclip(task_id: str, params: VideoClipParams, subclip_path_videos: li
logger.info("\n\n## 1. 加载视频脚本")
video_script_path = path.join(params.video_clip_json_path)
# video_script_path = video_clip_json_path
# 判断json文件是否存在
if path.exists(video_script_path):
try:
with open(video_script_path, "r", encoding="utf-8") as f:
@ -355,10 +234,12 @@ def start_subclip(task_id: str, params: VideoClipParams, subclip_path_videos: li
logger.debug(f"解说完整脚本: \n{video_script}")
logger.debug(f"解说 OST 列表: \n{video_ost}")
logger.debug(f"解说时间戳列表: \n{time_list}")
# 获取视频总时长(单位 s)
total_duration = list_script[-1]['new_timestamp']
total_duration = int(total_duration.split("-")[1].split(":")[0]) * 60 + int(
total_duration.split("-")[1].split(":")[1])
last_timestamp = list_script[-1]['new_timestamp']
end_time = last_timestamp.split("-")[1]
total_duration = utils.time_to_seconds(end_time)
except Exception as e:
logger.error(f"无法读取视频json脚本请检查配置是否正确。{e}")
raise ValueError("无法读取视频json脚本请检查配置是否正确")
@ -366,32 +247,51 @@ def start_subclip(task_id: str, params: VideoClipParams, subclip_path_videos: li
logger.error(f"video_script_path: {video_script_path} \n\n", traceback.format_exc())
raise ValueError("解说脚本不存在!请检查配置是否正确。")
logger.info("\n\n## 2. 生成音频列表")
audio_files, sub_maker_list = voice.tts_multiple(
task_id=task_id,
list_script=list_script,
voice_name=voice_name,
voice_rate=params.voice_rate,
voice_pitch=params.voice_pitch,
force_regenerate=True
logger.info("\n\n## 2. 根据OST设置生成音频列表")
# 只为OST=0或2的片段生成TTS音频
tts_segments = [
segment for segment in list_script
if segment['OST'] in [0, 2]
]
# logger.debug(f"tts_segments: {tts_segments}")
if tts_segments:
audio_files, sub_maker_list = voice.tts_multiple(
task_id=task_id,
list_script=tts_segments, # 只传入需要TTS的片段
voice_name=voice_name,
voice_rate=params.voice_rate,
voice_pitch=params.voice_pitch,
force_regenerate=True
)
if audio_files is None:
sm.state.update_task(task_id, state=const.TASK_STATE_FAILED)
logger.error("TTS转换音频失败, 可能是网络不可用! 如果您在中国, 请使用VPN.")
return
else:
audio_files = []
logger.info(f"合并音频文件:\n{audio_files}")
# 传入OST信息以便正确处理音频
final_audio = audio_merger.merge_audio_files(
task_id=task_id,
audio_files=audio_files,
total_duration=total_duration,
list_script=list_script # 传入完整脚本以便处理OST
)
if audio_files is None:
sm.state.update_task(task_id, state=const.TASK_STATE_FAILED)
logger.error(
"TTS转换音频失败, 可能是网络不可用! 如果您在中国, 请使用VPN.")
return
logger.info(f"合并音频:\n\n {audio_files}")
audio_file = audio_merger.merge_audio_files(task_id, audio_files, total_duration, list_script)
sm.state.update_task(task_id, state=const.TASK_STATE_PROCESSING, progress=30)
# 只为OST=0或2的片段生成字幕
subtitle_path = ""
if params.subtitle_enabled:
subtitle_path = path.join(utils.task_dir(task_id), f"subtitle.srt")
subtitle_provider = config.app.get("subtitle_provider", "").strip().lower()
logger.info(f"\n\n## 3. 生成字幕、提供程序是: {subtitle_provider}")
# 使用 faster-whisper-large-v2 模型生成字幕
subtitle.create(audio_file=audio_file, subtitle_file=subtitle_path)
subtitle.create(
audio_file=final_audio,
subtitle_file=subtitle_path,
)
subtitle_lines = subtitle.file_to_subtitles(subtitle_path)
if not subtitle_lines:
@ -402,7 +302,7 @@ def start_subclip(task_id: str, params: VideoClipParams, subclip_path_videos: li
logger.info("\n\n## 4. 裁剪视频")
subclip_videos = [x for x in subclip_path_videos.values()]
logger.debug(f"\n\n## 裁剪后的视频文件列表: \n{subclip_videos}")
# logger.debug(f"\n\n## 裁剪后的视频文件列表: \n{subclip_videos}")
if not subclip_videos:
sm.state.update_task(task_id, state=const.TASK_STATE_FAILED)
@ -434,14 +334,15 @@ def start_subclip(task_id: str, params: VideoClipParams, subclip_path_videos: li
final_video_path = path.join(utils.task_dir(task_id), f"final-{index}.mp4")
logger.info(f"\n\n## 6. 最后一步: {index} => {final_video_path}")
# 把所有东西合到在一起
logger.info(f"\n\n## 6. 最后合成: {index} => {final_video_path}")
# 传入OST信息以便正确处理音频和视频
video.generate_video_v2(
video_path=combined_video_path,
audio_path=audio_file,
audio_path=final_audio,
subtitle_path=subtitle_path,
output_file=final_video_path,
params=params,
list_script=list_script # 传入完整脚本以便处理OST
)
_progress += 50 / 2

View File

@ -18,6 +18,15 @@ from app.utils import utils
def get_bgm_file(bgm_type: str = "random", bgm_file: str = ""):
"""
获取背景音乐文件路径
Args:
bgm_type: 背景音乐类型可选值: random(随机), ""(无背景音乐)
bgm_file: 指定的背景音乐文件路径
Returns:
str: 背景音乐文件路径
"""
if not bgm_type:
return ""
@ -48,21 +57,35 @@ def get_bgm_file(bgm_type: str = "random", bgm_file: str = ""):
def combine_videos(
combined_video_path: str,
video_paths: List[str],
audio_file: str,
video_aspect: VideoAspect = VideoAspect.portrait,
video_concat_mode: VideoConcatMode = VideoConcatMode.random,
max_clip_duration: int = 5,
threads: int = 2,
combined_video_path: str,
video_paths: List[str],
audio_file: str,
video_aspect: VideoAspect = VideoAspect.portrait,
video_concat_mode: VideoConcatMode = VideoConcatMode.random,
max_clip_duration: int = 5,
threads: int = 2,
) -> str:
"""
合并多个视频片段
Args:
combined_video_path: 合并后的视频保存路径
video_paths: 待合并的视频路径列表
audio_file: 音频文件路径
video_aspect: 视频宽高比
video_concat_mode: 视频拼接模式(随机/顺序)
max_clip_duration: 每个片段的最大时长()
threads: 处理线程数
Returns:
str: 合并后的视频路径
"""
audio_clip = AudioFileClip(audio_file)
audio_duration = audio_clip.duration
logger.info(f"max duration of audio: {audio_duration} seconds")
# Required duration of each clip
logger.info(f"音频时长: {audio_duration}")
# 每个片段的所需时长
req_dur = audio_duration / len(video_paths)
req_dur = max_clip_duration
logger.info(f"each clip will be maximum {req_dur} seconds long")
logger.info(f"每个片段最大时长: {req_dur}")
output_dir = os.path.dirname(combined_video_path)
aspect = VideoAspect(video_aspect)
@ -81,22 +104,22 @@ def combine_videos(
end_time = min(start_time + max_clip_duration, clip_duration)
split_clip = clip.subclip(start_time, end_time)
raw_clips.append(split_clip)
# logger.info(f"splitting from {start_time:.2f} to {end_time:.2f}, clip duration {clip_duration:.2f}, split_clip duration {split_clip.duration:.2f}")
# logger.info(f"从 {start_time:.2f} 到 {end_time:.2f}, 片段时长 {clip_duration:.2f}, 分割片段时长 {split_clip.duration:.2f}")
start_time = end_time
if video_concat_mode.value == VideoConcatMode.sequential.value:
break
# random video_paths order
# 随机视频片段顺序
if video_concat_mode.value == VideoConcatMode.random.value:
random.shuffle(raw_clips)
# Add downloaded clips over and over until the duration of the audio (max_duration) has been reached
# 添加下载的片段,直到音频时长(max_duration)达到
while video_duration < audio_duration:
for clip in raw_clips:
# Check if clip is longer than the remaining audio
# 检查片段是否比剩余音频时长长
if (audio_duration - video_duration) < clip.duration:
clip = clip.subclip(0, (audio_duration - video_duration))
# Only shorten clips if the calculated clip length (req_dur) is shorter than the actual clip to prevent still image
# 仅当计算的片段时长(req_dur)小于实际片段时长时,缩短片段
elif req_dur < clip.duration:
clip = clip.subclip(0, req_dur)
clip = clip.set_fps(30)
@ -134,7 +157,7 @@ def combine_videos(
)
logger.info(
f"resizing video to {video_width} x {video_height}, clip size: {clip_w} x {clip_h}"
f"调整视频尺寸为 {video_width} x {video_height}, 片段尺寸: {clip_w} x {clip_h}"
)
if clip.duration > max_clip_duration:
@ -146,7 +169,7 @@ def combine_videos(
video_clip = concatenate_videoclips(clips)
video_clip = video_clip.set_fps(30)
logger.info("writing")
# https://github.com/harry0703/NarratoAI/issues/111#issuecomment-2032354030
video_clip.write_videofile(
filename=combined_video_path,
threads=threads,
@ -161,6 +184,17 @@ def combine_videos(
def wrap_text(text, max_width, font, fontsize=60):
"""
文本自动换行处理
Args:
text: 待处理的文本
max_width: 最大宽度
font: 字体文件路径
fontsize: 字体大小
Returns:
tuple: (换行后的文本, 文本高度)
"""
# 创建字体对象
font = ImageFont.truetype(font, fontsize)
@ -220,6 +254,14 @@ def wrap_text(text, max_width, font, fontsize=60):
@contextmanager
def manage_clip(clip):
"""
视频片段资源管理器
Args:
clip: 视频片段对象
Yields:
VideoFileClip: 视频片段对象
"""
try:
yield clip
finally:
@ -232,6 +274,7 @@ def generate_video_v2(
audio_path: str,
subtitle_path: str,
output_file: str,
list_script: list,
params: Union[VideoParams, VideoClipParams],
progress_callback=None,
):
@ -250,7 +293,7 @@ def generate_video_v2(
"""
total_steps = 4
current_step = 0
def update_progress(step_name):
nonlocal current_step
current_step += 1
@ -260,7 +303,7 @@ def generate_video_v2(
try:
validate_params(video_path, audio_path, output_file, params)
with manage_clip(VideoFileClip(video_path)) as video_clip:
aspect = VideoAspect(params.video_aspect)
video_width, video_height = aspect.to_resolution()
@ -304,7 +347,7 @@ def generate_video_v2(
_clip = _clip.set_start(subtitle_item[0][0])
_clip = _clip.set_end(subtitle_item[0][1])
_clip = _clip.set_duration(duration)
if params.subtitle_position == "bottom":
_clip = _clip.set_position(("center", video_height * 0.95 - _clip.h))
elif params.subtitle_position == "top":
@ -335,6 +378,7 @@ def generate_video_v2(
update_progress("字幕处理完成")
# 合并音频和导出
logger.info("开始导出视频 (此步骤耗时较长请耐心等待)")
video_clip = video_clip.set_audio(final_audio)
video_clip.write_videofile(
output_file,
@ -344,7 +388,7 @@ def generate_video_v2(
logger=None,
fps=30,
)
except FileNotFoundError as e:
logger.error(f"文件不存在: {str(e)}")
raise
@ -356,15 +400,25 @@ def generate_video_v2(
def process_audio_tracks(original_audio, new_audio, params, video_duration):
"""处理所有音轨"""
"""
处理所有音轨(原声配音背景音乐)
Args:
original_audio: 原始音频
new_audio: 新音频
params: 视频参数
video_duration: 视频时长
Returns:
CompositeAudioClip: 合成后的音频
"""
audio_tracks = []
if original_audio is not None:
audio_tracks.append(original_audio)
new_audio = new_audio.volumex(params.voice_volume)
audio_tracks.append(new_audio)
# 处理背景音乐
bgm_file = get_bgm_file(bgm_type=params.bgm_type, bgm_file=params.bgm_file)
if bgm_file:
@ -374,35 +428,54 @@ def process_audio_tracks(original_audio, new_audio, params, video_duration):
audio_tracks.append(bgm_clip)
except Exception as e:
logger.error(f"添加背景音乐失败: {str(e)}")
return CompositeAudioClip(audio_tracks) if audio_tracks else new_audio
def process_subtitles(subtitle_path, video_clip, video_duration, create_text_clip):
"""处理字幕"""
"""
处理字幕
Args:
subtitle_path: 字幕文件路径
video_clip: 视频片段
video_duration: 视频时长
create_text_clip: 创建文本片段的回调函数
Returns:
CompositeVideoClip: 添加字幕后的视频
"""
if not (subtitle_path and os.path.exists(subtitle_path)):
return video_clip
sub = SubtitlesClip(subtitles=subtitle_path, encoding="utf-8")
text_clips = []
for item in sub.subtitles:
clip = create_text_clip(subtitle_item=item)
# 时间范围调整
start_time = max(clip.start, 0)
if start_time >= video_duration:
continue
end_time = min(clip.end, video_duration)
clip = clip.set_start(start_time).set_end(end_time)
text_clips.append(clip)
logger.info(f"处理了 {len(text_clips)} 段字幕")
return CompositeVideoClip([video_clip, *text_clips])
def preprocess_video(materials: List[MaterialInfo], clip_duration=4):
"""
预处理视频素材
Args:
materials: 素材信息列表
clip_duration: 片段时长()
Returns:
List[MaterialInfo]: 处理后的素材信息列表
"""
for material in materials:
if not material.url:
continue
@ -430,12 +503,12 @@ def preprocess_video(materials: List[MaterialInfo], clip_duration=4):
# 使用resize方法来添加缩放效果。这里使用了lambda函数来使得缩放效果随时间变化。
# 假设我们想要从原始大小逐渐放大到120%的大小。
# t代表当前时间clip.duration为视频总时长这里是3秒。
# 注意1 表示100%的大小所以1.2表示120%的大小
# 注意1 表示100%的大小所以1.2表示120%的大小
zoom_clip = clip.resize(
lambda t: 1 + (clip_duration * 0.03) * (t / clip.duration)
)
# 如果需要,可以创建一个包含缩放剪辑的复合频剪辑
# 如果需要,可以创建一个包含缩放剪辑的复合频剪辑
# (这在您想要在视频中添加其他元素时非常有用)
final_clip = CompositeVideoClip([zoom_clip])
@ -472,7 +545,7 @@ def combine_clip_videos(combined_video_path: str,
from app.utils.utils import calculate_total_duration
audio_duration = calculate_total_duration(list_script)
logger.info(f"音频的最大持续时间: {audio_duration} s")
output_dir = os.path.dirname(combined_video_path)
aspect = VideoAspect(video_aspect)
video_width, video_height = aspect.to_resolution()
@ -481,25 +554,25 @@ def combine_clip_videos(combined_video_path: str,
for video_path, video_ost in zip(video_paths, video_ost_list):
try:
clip = VideoFileClip(video_path)
if video_ost == 0: # 不保留原声
clip = clip.without_audio()
# video_ost 为 1 或 2 时都保留原声,不需要特殊处理
clip = clip.set_fps(30)
# 处理视频尺寸
clip_w, clip_h = clip.size
if clip_w != video_width or clip_h != video_height:
clip = resize_video_with_padding(
clip,
target_width=video_width,
clip,
target_width=video_width,
target_height=video_height
)
logger.info(f"视频 {video_path} 已调整尺寸为 {video_width} x {video_height}")
clips.append(clip)
except Exception as e:
logger.error(f"处理视频 {video_path} 时出错: {str(e)}")
continue
@ -510,8 +583,8 @@ def combine_clip_videos(combined_video_path: str,
try:
video_clip = concatenate_videoclips(clips)
video_clip = video_clip.set_fps(30)
logger.info("开始合并视频...")
logger.info("开始合并视频... (过程中出现 UserWarning: 不必理会)")
video_clip.write_videofile(
filename=combined_video_path,
threads=threads,
@ -521,7 +594,7 @@ def combine_clip_videos(combined_video_path: str,
temp_audiofile=os.path.join(output_dir, "temp-audio.m4a")
)
finally:
# 确保资源被正确<EFBFBD><EFBFBD><EFBFBD>
# 确保资源被正确
video_clip.close()
for clip in clips:
clip.close()
@ -531,13 +604,22 @@ def combine_clip_videos(combined_video_path: str,
def resize_video_with_padding(clip, target_width: int, target_height: int):
"""辅助函数:调整视频尺寸并添加黑边"""
"""
调整视频尺寸并添加黑边
Args:
clip: 视频片段
target_width: 目标宽度
target_height: 目标高度
Returns:
CompositeVideoClip: 调整尺寸后的视频
"""
clip_ratio = clip.w / clip.h
target_ratio = target_width / target_height
if clip_ratio == target_ratio:
return clip.resize((target_width, target_height))
if clip_ratio > target_ratio:
scale_factor = target_width / clip.w
else:
@ -548,10 +630,10 @@ def resize_video_with_padding(clip, target_width: int, target_height: int):
clip_resized = clip.resize(newsize=(new_width, new_height))
background = ColorClip(
size=(target_width, target_height),
size=(target_width, target_height),
color=(0, 0, 0)
).set_duration(clip.duration)
return CompositeVideoClip([
background,
clip_resized.set_position("center")
@ -559,106 +641,100 @@ def resize_video_with_padding(clip, target_width: int, target_height: int):
def validate_params(video_path, audio_path, output_file, params):
"""验证输入参数"""
"""
验证输入参数
Args:
video_path: 视频文件路径
audio_path: 音频文件路径
output_file: 输出文件路径
params: 视频参数
Raises:
FileNotFoundError: 文件不存在时抛出
ValueError: 参数无效时抛出
"""
if not os.path.exists(video_path):
raise FileNotFoundError(f"视频文件不存在: {video_path}")
if not os.path.exists(audio_path):
raise FileNotFoundError(f"音频文件不存在: {audio_path}")
output_dir = os.path.dirname(output_file)
if not os.path.exists(output_dir):
raise FileNotFoundError(f"输出目录不存在: {output_dir}")
if not hasattr(params, 'video_aspect'):
raise ValueError("params 缺少必要参数 video_aspect")
if __name__ == "__main__":
# combined_video_path = "../../storage/tasks/12312312/com123.mp4"
#
# video_paths = ['../../storage/cache_videos/vid-00_00-00_03.mp4',
# '../../storage/cache_videos/vid-00_03-00_07.mp4',
# '../../storage/cache_videos/vid-00_12-00_17.mp4',
# '../../storage/cache_videos/vid-00_26-00_31.mp4']
# video_ost_list = [False, True, False, True]
# list_script = [
# {
# "picture": "夜晚,一个小孩在树林里奔跑,后面有人拿着火把在追赶",
# "timestamp": "00:00-00:03",
# "narration": "夜黑风高的树林,一个小孩在拼命奔跑,后面的人穷追不舍!",
# "OST": False,
# "new_timestamp": "00:00-00:03"
# },
# {
# "picture": "追赶的人命令抓住小孩",
# "timestamp": "00:03-00:07",
# "narration": "原声播放1",
# "OST": True,
# "new_timestamp": "00:03-00:07"
# },
# {
# "picture": "小孩躲在草丛里,黑衣人用脚踢了踢他",
# "timestamp": "00:12-00:17",
# "narration": "小孩脱下外套,跑进树林, 一路奔跑,直到第二天清晨",
# "OST": False,
# "new_timestamp": "00:07-00:12"
# },
# {
# "picture": "小孩跑到车前,慌慌张张地对女人说有人要杀他",
# "timestamp": "00:26-00:31",
# "narration": "原声播放2",
# "OST": True,
# "new_timestamp": "00:12-00:17"
# }
# ]
combined_video_path = "../../storage/tasks/123/combined.mp4"
video_paths = ['../../storage/temp/clip_video/0b545e689a182a91af2163c7c0ca7ca3/vid-00-00-10_000-00-00-43_039.mp4',
'../../storage/temp/clip_video/0b545e689a182a91af2163c7c0ca7ca3/vid-00-00-45_439-00-01-01_600.mp4',
'../../storage/temp/clip_video/0b545e689a182a91af2163c7c0ca7ca3/vid-00-01-07_920-00-01-25_719.mp4',
'../../storage/temp/clip_video/0b545e689a182a91af2163c7c0ca7ca3/vid-00-01-36_959-00-01-53_719.mp4']
video_ost_list = [2, 2, 2, 2]
list_script = [
{
"timestamp": "00:10-00:43",
"picture": "好的,以下是视频画面的客观描述:\n\n视频显示一个男人在一个树木繁茂的地区靠近一个泥土斜坡他穿着一件深色T恤、卡其色长裤和登山靴。他背着一个军绿色背包里面似乎装有头和其他工具。\n\n第一个镜头显示该男子从远处走近斜坡,背对着镜头。下一个镜头特写显示了的背包,一个镐头从背包中伸出来。下一个镜头显示该男子用镐头敲打斜坡。下一个镜头是该男子脚上的特写镜头,他穿着登山靴,正站在泥土斜坡上。最后一个镜显示该男子在斜坡上,仔细地拨开树根和泥土。周围的环境是树木繁茂的,阳光透过树叶照射下来。土壤是浅棕色的,斜坡上有许多树根和植被。",
"narration": "(接上文)好吧,今天我们的男主角,背着一个看似随时要发射军绿色背包,竟然化身“泥土探险家”,在斜坡上挥舞着镐头!他这是准备挖宝还是给树根做个“美容”?阳光洒下来,简直是自然界的聚光灯,仿佛在说:“快来看看,这位勇士要挑战泥土极限!”我只能默默想,如果树根能说话,它们一定会喊:“别打我,我还有家人!”这就是生活,总有些搞笑的瞬间等着我们去发现!",
"OST": 2,
"new_timestamp": "00:00:00,000-00:00:33,000"
},
{
"timestamp": "00:45-01:01",
"picture": "好的以下是视频画面的客观描述:\n\n视频显示了一个人在森林里挖掘。\n\n第一个镜头是地面特写显示出松<EFBFBD><EFBFBD>的泥土、碎石和落叶。光线照在部分区域。\n\n第二个镜头中,一模糊不清的蹲一个树根旁挖掘,一个橄榄绿色的背包放在地上。树根缠绕着常春藤。\n\n第三个镜头显示该人在一个更开阔的区域挖掘,那里有一些树根,以及部分倒的树干。他起来像是在挖掘一个较大的坑。\n\n第四个镜头是特写镜头,显示该人用工具清理土坑的墙壁。\n\n第五个镜头是土坑内部的特写镜头,可以看到土质的纹理,有一些小树根和它植被的残留物。",
"narration": "现在,这位勇敢的挖掘者就像个“现代版的土豆农夫”,在林里开辟新天地。的目标是什么?挖一个宝藏还块“树根披萨”?小心哦,别让树根追着你喊:“不要挖我,我也是有故事的!”",
"OST": 2,
"new_timestamp": "00:00:33,000-00:00:49,000"
},
{
"timestamp": "01:07-01:25",
"picture": "好,以下是视频画面的客观描述:\n\n画面1特写镜头显示出一丛带有水珠的深绿色灌木叶片。叶片呈椭圆形边缘光滑。背景是树根和泥土。\n\n画面2一个留着胡子的男人正在一个森林中土坑里挖掘。他穿着黑色T恤和卡其色裤子跪在地用具挖掘泥土。周围环绕着树木、树根和灌木。一个倒下的树干横跨土坑上方。\n\n画面3同一个男人坐在他刚才挖的坑的边缘看着前方。他的表情似乎略带沉思。背景与画面2相同。\n\n画面4一个广角镜头显示出他挖出的坑。这是一个不规则形状的土坑在树木繁茂的斜坡上。土壤呈深棕色可见树根。\n\n画面5同一个男人跪在地上用一把小斧头砍一根木头。他穿着与前几个画面相同的衣服。地面上覆盖着落叶。周围是树木和灌木。",
"narration": "“哎呀,这片灌木叶子滴水如雨,感觉像是大自然的洗发水广告!但我这位‘挖宝达人’似乎更适合拍个‘森林里的单身狗’真人秀。等会儿,我要给树根唱首歌,听说它们爱音乐!”",
"OST": 2,
"new_timestamp": "00:00:49,000-00:01:07,000"
},
{
"timestamp": "01:36-01:53",
"picture": "好的,以下是视频画面内容的客观描述:\n\n视频包含三个镜头:\n\n**镜头一:**个小型、浅水池塘,位于树林中。池塘的水看起来浑浊,呈绿褐色。池塘周围遍布泥土和落叶。多根树枝和树干横跨池塘,部分浸没在水中。周围的植被茂密主要是深色树木和灌木。\n\n**镜头二:**距拍摄树深处,阳光透过树叶洒落在植被上。镜头中可见粗大的树干、树枝和各种绿叶植物。部分树枝似乎被砍断,切口可见。\n\n**镜头三:**近距离特写镜头,聚焦在树枝和绿叶上。叶片呈圆形,颜色为鲜绿色,有些叶片上有缺损。树枝颜色较深,呈现深褐色。背景是模糊的树林。\n",
"narration": "“好吧,看来我们的‘挖宝达人’终于找到了一‘宝藏’——一个色泽如同绿豆汤的池塘!我敢打赌,这里不仅是小鱼儿的游乐场更是树枝们的‘水疗中心’!下次来这里,我得带上浮潜装备!”",
"OST": 2,
"new_timestamp": "00:01:07,000-00:01:24,000"
}
]
# 合并子视频
# combine_clip_videos(combined_video_path=combined_video_path, video_paths=video_paths, video_ost_list=video_ost_list, list_script=list_script)
# cfg = VideoClipParams()
# cfg.video_aspect = VideoAspect.portrait
# cfg.font_name = "STHeitiMedium.ttc"
# cfg.font_size = 60
# cfg.stroke_color = "#000000"
# cfg.stroke_width = 1.5
# cfg.text_fore_color = "#FFFFFF"
# cfg.text_background_color = "transparent"
# cfg.bgm_type = "random"
# cfg.bgm_file = ""
# cfg.bgm_volume = 1.0
# cfg.subtitle_enabled = True
# cfg.subtitle_position = "bottom"
# cfg.n_threads = 2
# cfg.paragraph_number = 1
#
# cfg.voice_volume = 1.0
cfg = VideoClipParams()
cfg.video_aspect = VideoAspect.portrait
cfg.font_name = "STHeitiMedium.ttc"
cfg.font_size = 60
cfg.stroke_color = "#000000"
cfg.stroke_width = 1.5
cfg.text_fore_color = "#FFFFFF"
cfg.text_background_color = "transparent"
cfg.bgm_type = "random"
cfg.bgm_file = ""
cfg.bgm_volume = 1.0
cfg.subtitle_enabled = True
cfg.subtitle_position = "bottom"
cfg.n_threads = 2
cfg.video_volume = 1
# generate_video(video_path=video_file,
# audio_path=audio_file,
# subtitle_path=subtitle_file,
# output_file=output_file,
# params=cfg
# )
#
# video_path = "../../storage/tasks/7f5ae494-abce-43cf-8f4f-4be43320eafa/combined-1.mp4"
#
# audio_path = "../../storage/tasks/7f5ae494-abce-43cf-8f4f-4be43320eafa/audio_00-00-00-07.mp3"
#
# subtitle_path = "../../storage/tasks/7f5ae494-abce-43cf-8f4f-4be43320eafa\subtitle.srt"
#
# output_file = "../../storage/tasks/7f5ae494-abce-43cf-8f4f-4be43320eafa/final-123.mp4"
#
# generate_video_v2(video_path=video_path,
# audio_path=audio_path,
# subtitle_path=subtitle_path,
# output_file=output_file,
# params=cfg
# )
cfg.voice_volume = 1.0
# 合并视频
video_list = [
'./storage/cache_videos/vid-01_03-01_50.mp4',
'./storage/cache_videos/vid-01_55-02_29.mp4',
'./storage/cache_videos/vid-03_24-04_04.mp4',
'./storage/cache_videos/vid-04_50-05_28.mp4'
]
video_path = "../../storage/tasks/123/combined.mp4"
audio_path = "../../storage/tasks/123/final_audio.mp3"
subtitle_path = "../../storage/tasks/123/subtitle.srt"
output_file = "../../storage/tasks/123/final-123.mp4"
generate_video_v2(video_path=video_path,
audio_path=audio_path,
subtitle_path=subtitle_path,
output_file=output_file,
params=cfg,
list_script=list_script,
)

View File

@ -0,0 +1,58 @@
import os
from uuid import uuid4
from loguru import logger
from typing import Dict, List, Optional, Tuple
from app.services import material
from app.models.schema import VideoClipParams
from app.utils import utils
class VideoService:
@staticmethod
async def crop_video(
video_path: str,
video_script: List[dict]
) -> Tuple[str, Dict[str, str]]:
"""
裁剪视频服务
Args:
video_path: 视频文件路径
video_script: 视频脚本列表
Returns:
Tuple[str, Dict[str, str]]: (task_id, 裁剪后的视频片段字典)
视频片段字典格式: {timestamp: video_path}
"""
try:
task_id = str(uuid4())
# 从脚本中提取时间戳列表
time_list = [scene['timestamp'] for scene in video_script]
# 调用裁剪服务
subclip_videos = material.clip_videos(
task_id=task_id,
timestamp_terms=time_list,
origin_video=video_path
)
if subclip_videos is None:
raise ValueError("裁剪视频失败")
# 更新脚本中的视频路径
for scene in video_script:
try:
scene['path'] = subclip_videos[scene['timestamp']]
except KeyError as err:
logger.error(f"更新视频路径失败: {err}")
logger.debug(f"裁剪视频成功,共生成 {len(time_list)} 个视频片段")
logger.debug(f"视频片段路径: {subclip_videos}")
return task_id, subclip_videos
except Exception as e:
logger.exception("裁剪视频失败")
raise

View File

@ -11,6 +11,7 @@ from edge_tts.submaker import mktimestamp
from xml.sax.saxutils import unescape
from edge_tts import submaker, SubMaker
from moviepy.video.tools import subtitles
import time
from app.config import config
from app.utils import utils
@ -989,6 +990,9 @@ Gender: Female
Name: zh-CN-XiaoxiaoMultilingualNeural-V2
Gender: Female
Name: zh-CN-YunxiNeural-V2
Gender: Male
""".strip()
voices = []
name = ""
@ -1034,8 +1038,8 @@ def is_azure_v2_voice(voice_name: str):
def tts(
text: str, voice_name: str, voice_rate: float, voice_pitch: float, voice_file: str
) -> [SubMaker, None]:
# if is_azure_v2_voice(voice_name):
# return azure_tts_v2(text, voice_name, voice_file)
if is_azure_v2_voice(voice_name):
return azure_tts_v2(text, voice_name, voice_file)
return azure_tts_v1(text, voice_name, voice_rate, voice_pitch, voice_file)
@ -1068,33 +1072,47 @@ def azure_tts_v1(
pitch_str = convert_pitch_to_percent(voice_pitch)
for i in range(3):
try:
logger.info(f"start, voice name: {voice_name}, try: {i + 1}")
logger.info(f"{i+1} 次使用 edge_tts 生成音频")
async def _do() -> SubMaker:
async def _do() -> tuple[SubMaker, bytes]:
communicate = edge_tts.Communicate(text, voice_name, rate=rate_str, pitch=pitch_str, proxy=config.proxy.get("http"))
sub_maker = edge_tts.SubMaker()
with open(voice_file, "wb") as file:
async for chunk in communicate.stream():
if chunk["type"] == "audio":
file.write(chunk["data"])
elif chunk["type"] == "WordBoundary":
sub_maker.create_sub(
(chunk["offset"], chunk["duration"]), chunk["text"]
)
return sub_maker
# 判断音频文件是否一件存在
audio_data = bytes() # 用于存储音频数据
async for chunk in communicate.stream():
if chunk["type"] == "audio":
audio_data += chunk["data"]
elif chunk["type"] == "WordBoundary":
sub_maker.create_sub(
(chunk["offset"], chunk["duration"]), chunk["text"]
)
return sub_maker, audio_data
# 判断音频文件是否已存在
if os.path.exists(voice_file):
logger.info(f"voice file exists, skip tts: {voice_file}")
continue
sub_maker = asyncio.run(_do())
if not sub_maker or not sub_maker.subs:
logger.warning(f"failed, sub_maker is None or sub_maker.subs is None")
# 获取音频数据和字幕信息
sub_maker, audio_data = asyncio.run(_do())
# 验证数据是否有效
if not sub_maker or not sub_maker.subs or not audio_data:
logger.warning(f"failed, invalid data generated")
if i < 2:
time.sleep(1)
continue
# 数据有效,写入文件
with open(voice_file, "wb") as file:
file.write(audio_data)
logger.info(f"completed, output file: {voice_file}")
return sub_maker
except Exception as e:
logger.error(f"failed, error: {str(e)}")
logger.error(f"生成音频文件时出错: {str(e)}")
if i < 2:
time.sleep(1)
return None
@ -1130,14 +1148,6 @@ def azure_tts_v2(text: str, voice_name: str, voice_file: str) -> [SubMaker, None
sub_maker = SubMaker()
def speech_synthesizer_word_boundary_cb(evt: speechsdk.SessionEventArgs):
# print('WordBoundary event:')
# print('\tBoundaryType: {}'.format(evt.boundary_type))
# print('\tAudioOffset: {}ms'.format((evt.audio_offset + 5000)))
# print('\tDuration: {}'.format(evt.duration))
# print('\tText: {}'.format(evt.text))
# print('\tTextOffset: {}'.format(evt.text_offset))
# print('\tWordLength: {}'.format(evt.word_length))
duration = _format_duration_to_offset(str(evt.duration))
offset = _format_duration_to_offset(evt.audio_offset)
sub_maker.subs.append(evt.text)
@ -1183,9 +1193,13 @@ def azure_tts_v2(text: str, voice_name: str, voice_file: str) -> [SubMaker, None
logger.error(
f"azure v2 speech synthesis error: {cancellation_details.error_details}"
)
if i < 2: # 如果不是最后一次重试则等待1秒
time.sleep(1)
logger.info(f"completed, output file: {voice_file}")
except Exception as e:
logger.error(f"failed, error: {str(e)}")
if i < 2: # 如果不是最后一次重试则等待1秒
time.sleep(1)
return None
@ -1443,7 +1457,7 @@ def tts_multiple(task_id: str, list_script: list, voice_name: str, voice_rate: f
if sub_maker is None:
logger.error(f"无法为时间戳 {timestamp} 生成音频; "
f"如果您在中国请使用VPN。或者手动选择 zh-CN-YunyangNeural 等角色;"
f"如果您在中国请使用VPN; "
f"或者使用其他 tts 引擎")
continue
@ -1460,17 +1474,12 @@ if __name__ == "__main__":
voice_name = parse_voice_name(voice_name)
print(voice_name)
with open("../../resource/scripts/test.json", 'r', encoding='utf-8') as f:
with open("../../resource/scripts/2024-1203-205442.json", 'r', encoding='utf-8') as f:
data = json.load(f)
audio_files, sub_maker_list = tts_multiple(task_id="12312312", list_script=data, voice_name=voice_name, voice_rate=1)
audio_files, sub_maker_list = tts_multiple(task_id="12312312", list_script=data, voice_name=voice_name, voice_rate=1, voice_pitch=1)
full_text = " ".join([item['narration'] for item in data if not item['OST']])
subtitle_file = os.path.join(utils.task_dir("12312312"), "subtitle_multiple.srt")
create_subtitle_from_multiple(full_text, sub_maker_list, data, subtitle_file)
print(f"生成的音频文件列表: {audio_files}")
print(f"生成的字幕文件: {subtitle_file}")
# text = " ".join([item['narration'] for item in data])
# sub_marks = tts(text=text, voice_name=voice_name, voice_rate=1, voice_file="../../storage/tasks/12312312/aaa.mp3")
# create_subtitle(text=text, sub_maker=sub_marks, subtitle_file="../../storage/tasks/12312312/subtitle_123.srt")

View File

@ -0,0 +1,146 @@
import yt_dlp
import os
from typing import List, Dict, Optional, Tuple
from loguru import logger
from uuid import uuid4
from app.utils import utils
from app.services import video as VideoService
class YoutubeService:
def __init__(self):
self.supported_formats = ['mp4', 'mkv', 'webm', 'flv', 'avi']
def _get_video_formats(self, url: str) -> List[Dict]:
"""获取视频可用的格式列表"""
ydl_opts = {
'quiet': True,
'no_warnings': True
}
try:
with yt_dlp.YoutubeDL(ydl_opts) as ydl:
info = ydl.extract_info(url, download=False)
formats = info.get('formats', [])
format_list = []
for f in formats:
format_info = {
'format_id': f.get('format_id', 'N/A'),
'ext': f.get('ext', 'N/A'),
'resolution': f.get('format_note', 'N/A'),
'filesize': f.get('filesize', 'N/A'),
'vcodec': f.get('vcodec', 'N/A'),
'acodec': f.get('acodec', 'N/A')
}
format_list.append(format_info)
return format_list
except Exception as e:
logger.error(f"获取视频格式失败: {str(e)}")
raise
def _validate_format(self, output_format: str) -> None:
"""验证输出格式是否支持"""
if output_format.lower() not in self.supported_formats:
raise ValueError(
f"不支持的视频格式: {output_format}"
f"支持的格式: {', '.join(self.supported_formats)}"
)
async def download_video(
self,
url: str,
resolution: str,
output_format: str = 'mp4',
rename: Optional[str] = None
) -> Tuple[str, str, str]:
"""
下载指定分辨率的视频
Args:
url: YouTube视频URL
resolution: 目标分辨率 ('2160p', '1440p', '1080p', '720p' etc.)
注意对于类似'1080p60'的输入会被处理为'1080p'
output_format: 输出视频格式
rename: 可选的重命名
Returns:
Tuple[str, str, str]: (task_id, output_path, filename)
"""
try:
task_id = str(uuid4())
self._validate_format(output_format)
# 标准化分辨率格式
base_resolution = resolution.split('p')[0] + 'p'
# 获取所有可用格式
formats = self._get_video_formats(url)
# 查找指定分辨率的最佳视频格式
target_format = None
for fmt in formats:
fmt_resolution = fmt['resolution']
# 将格式的分辨率也标准化后进行比较
if fmt_resolution != 'N/A':
fmt_base_resolution = fmt_resolution.split('p')[0] + 'p'
if fmt_base_resolution == base_resolution and fmt['vcodec'] != 'none':
target_format = fmt
break
if target_format is None:
# 收集可用分辨率时也进行标准化
available_resolutions = set(
fmt['resolution'].split('p')[0] + 'p'
for fmt in formats
if fmt['resolution'] != 'N/A' and fmt['vcodec'] != 'none'
)
raise ValueError(
f"未找到 {base_resolution} 分辨率的视频。"
f"可用分辨率: {', '.join(sorted(available_resolutions))}"
)
# 创建输出目录
output_dir = utils.video_dir()
os.makedirs(output_dir, exist_ok=True)
# 设置下载选项
if rename:
# 如果指定了重命名,直接使用新名字
filename = f"{rename}.{output_format}"
output_template = os.path.join(output_dir, filename)
else:
# 否则使用任务ID和原标题
output_template = os.path.join(output_dir, f'{task_id}_%(title)s.%(ext)s')
ydl_opts = {
'format': f"{target_format['format_id']}+bestaudio[ext=m4a]/best",
'outtmpl': output_template,
'merge_output_format': output_format.lower(),
'postprocessors': [{
'key': 'FFmpegVideoConvertor',
'preferedformat': output_format.lower(),
}]
}
# 执行下载
with yt_dlp.YoutubeDL(ydl_opts) as ydl:
info = ydl.extract_info(url, download=True)
if rename:
# 如果指定了重命名,使用新文件名
output_path = output_template
filename = os.path.basename(output_path)
else:
# 否则使用原始标题
video_title = info.get('title', task_id)
filename = f"{task_id}_{video_title}.{output_format}"
output_path = os.path.join(output_dir, filename)
logger.info(f"视频下载成功: {output_path}")
return task_id, output_path, filename
except Exception as e:
logger.exception("下载视频失败")
raise

View File

@ -1,21 +1,32 @@
"""
使用 moviepy 库剪辑指定时间戳视频
使用 moviepy 库剪辑指定时间戳视频支持时分秒毫秒精度
"""
from moviepy.editor import VideoFileClip
from datetime import datetime
import os
def time_str_to_seconds(time_str: str) -> float:
"""
将时间字符串转换为秒数
参数:
time_str: 格式为"MM:SS"的时间字符串
time_str: 格式为"HH:MM:SS,mmm"的时间字符串例如"00:01:23,456"
返回:
转换后的秒数
转换后的秒数(float)
"""
time_obj = datetime.strptime(time_str, "%M:%S")
return time_obj.minute * 60 + time_obj.second
try:
# 分离时间和毫秒
time_part, ms_part = time_str.split(',')
# 转换时分秒
time_obj = datetime.strptime(time_part, "%H:%M:%S")
# 计算总秒数
total_seconds = time_obj.hour * 3600 + time_obj.minute * 60 + time_obj.second
# 添加毫秒部分
total_seconds += int(ms_part) / 1000
return total_seconds
except ValueError as e:
raise ValueError("时间格式错误,请使用 HH:MM:SS,mmm 格式,例如 00:01:23,456") from e
def format_duration(seconds: float) -> str:
@ -24,40 +35,88 @@ def format_duration(seconds: float) -> str:
参数:
seconds: 秒数
返回:
格式化的时间字符串 (MM:SS)
格式化的时间字符串 (HH:MM:SS,mmm)
"""
minutes = int(seconds // 60)
remaining_seconds = int(seconds % 60)
return f"{minutes:02d}:{remaining_seconds:02d}"
hours = int(seconds // 3600)
minutes = int((seconds % 3600) // 60)
seconds_remain = seconds % 60
whole_seconds = int(seconds_remain)
milliseconds = int((seconds_remain - whole_seconds) * 1000)
return f"{hours:02d}:{minutes:02d}:{whole_seconds:02d},{milliseconds:03d}"
def cut_video(video_path: str, start_time: str, end_time: str) -> None:
def cut_video(video_path: str, start_time: str, end_time: str, output_path: str) -> None:
"""
剪辑视频
参数:
video_path: 视频文件路径
start_time: 开始时间 (格式: "MM:SS")
end_time: 结束时间 (格式: "MM:SS")
start_time: 开始时间 (格式: "HH:MM:SS,mmm")
end_time: 结束时间 (格式: "HH:MM:SS,mmm")
output_path: 输出文件路径
"""
# 转换时间字符串为秒数
start_seconds = time_str_to_seconds(start_time)
end_seconds = time_str_to_seconds(end_time)
# 加载视频文件
video = VideoFileClip(video_path)
# 计算剪辑时长
clip_duration = end_seconds - start_seconds
print(f"原视频总长度: {format_duration(video.duration)}")
print(f"剪辑时长: {format_duration(clip_duration)}")
# 剪辑视频
video = video.subclip(start_seconds, end_seconds)
video.write_videofile("../../resource/videos/cut_video2.mp4")
# 释放资源
video.close()
try:
# 确保输出目录存在
output_dir = os.path.dirname(output_path)
if not os.path.exists(output_dir):
os.makedirs(output_dir)
# 如果输出文件已存在,先尝试删除
if os.path.exists(output_path):
try:
os.remove(output_path)
except PermissionError:
print(f"无法删除已存在的文件:{output_path},请确保文件未被其他程序占用")
return
# 转换时间字符串为秒数
start_seconds = time_str_to_seconds(start_time)
end_seconds = time_str_to_seconds(end_time)
# 加载视频文件
video = VideoFileClip(video_path)
# 验证时间范围
if start_seconds >= video.duration or end_seconds > video.duration:
raise ValueError(f"剪辑时间超出视频长度!视频总长度为: {format_duration(video.duration)}")
if start_seconds >= end_seconds:
raise ValueError("结束时间必须大于开始时间!")
# 计算剪辑时长
clip_duration = end_seconds - start_seconds
print(f"原视频总长度: {format_duration(video.duration)}")
print(f"剪辑时长: {format_duration(clip_duration)}")
print(f"剪辑区间: {start_time} -> {end_time}")
# 剪辑视频
video = video.subclip(start_seconds, end_seconds)
# 添加错误处理的写入过程
try:
video.write_videofile(
output_path,
codec='libx264',
audio_codec='aac',
temp_audiofile='temp-audio.m4a',
remove_temp=True
)
except IOError as e:
print(f"写入视频文件时发生错误:{str(e)}")
raise
finally:
# 确保资源被释放
video.close()
except Exception as e:
print(f"视频剪辑过程中发生错误:{str(e)}")
raise
if __name__ == "__main__":
cut_video("../../resource/videos/best.mp4", "00:40", "02:40")
cut_video(
video_path="/Users/apple/Desktop/NarratoAI/resource/videos/duanju_yuansp.mp4",
start_time="00:00:00,789",
end_time="00:02:00,123",
output_path="/Users/apple/Desktop/NarratoAI/resource/videos/duanju_yuansp_cut3.mp4"
)

105
app/test/test_qwen.py Normal file
View File

@ -0,0 +1,105 @@
import os
import traceback
import json
from openai import OpenAI
from pydantic import BaseModel
from typing import List
from app.utils import utils
from app.services.subtitle import extract_audio_and_create_subtitle
class Step(BaseModel):
timestamp: str
picture: str
narration: str
OST: int
new_timestamp: str
class MathReasoning(BaseModel):
result: List[Step]
def chat_with_qwen(prompt: str, system_message: str, subtitle_path: str) -> str:
"""
与通义千问AI模型进行对话
Args:
prompt (str): 用户输入的问题或提示
system_message (str): 系统提示信息用于设定AI助手的行为默认为"You are a helpful assistant."
subtitle_path (str): 字幕文件路径
Returns:
str: AI助手的回复内容
Raises:
Exception: 当API调用失败时抛出异常
"""
try:
client = OpenAI(
api_key="sk-a1acd853d88d41d3ae92777d7bfa2612",
base_url="https://dashscope.aliyuncs.com/compatible-mode/v1",
)
# 读取字幕文件
with open(subtitle_path, "r", encoding="utf-8") as file:
subtitle_content = file.read()
completion = client.chat.completions.create(
model="qwen-turbo-2024-11-01",
messages=[
{'role': 'system', 'content': system_message},
{'role': 'user', 'content': prompt + subtitle_content}
]
)
return completion.choices[0].message.content
except Exception as e:
error_message = f"调用千问API时发生错误{str(e)}"
print(error_message)
print("请参考文档https://help.aliyun.com/zh/model-studio/developer-reference/error-code")
raise Exception(error_message)
# 使用示例
if __name__ == "__main__":
try:
video_path = utils.video_dir("duanju_yuansp.mp4")
# # 判断视频是否存在
# if not os.path.exists(video_path):
# print(f"视频文件不存在:{video_path}")
# exit(1)
# 提取字幕
subtitle_path = os.path.join(utils.video_dir(""), f"duanju_yuan.srt")
extract_audio_and_create_subtitle(video_file=video_path, subtitle_file=subtitle_path)
# 分析字幕
system_message = """
你是一个视频srt字幕分析剪辑器, 输入视频的srt字幕, 分析其中的精彩且尽可能连续的片段并裁剪出来, 注意确保文字与时间戳的正确匹配
输出需严格按照如下 json 格式:
[
{
"timestamp": "00:00:50,020-00,01:44,000",
"picture": "画面1",
"narration": "播放原声",
"OST": 0,
"new_timestamp": "00:00:00,000-00:00:54,020"
},
{
"timestamp": "01:49-02:30",
"picture": "画面2",
"narration": "播放原声",
"OST": 2,
"new_timestamp": "00:54-01:35"
},
]
"""
prompt = "字幕如下:\n"
response = chat_with_qwen(prompt, system_message, subtitle_path)
print(response)
# 保存json注意json中是时间戳需要转换为 分:秒(现在的时间是 "timestamp": "00:00:00,020-00:00:01,660", 需要转换为 "timestamp": "00:00-01:66")
# response = json.loads(response)
# for item in response:
# item["timestamp"] = item["timestamp"].replace(":", "-")
# with open(os.path.join(utils.video_dir(""), "duanju_yuan.json"), "w", encoding="utf-8") as file:
# json.dump(response, file, ensure_ascii=False)
except Exception as e:
print(traceback.format_exc())

View File

@ -10,6 +10,7 @@ from google.api_core import exceptions
import google.generativeai as genai
import PIL.Image
import traceback
from app.utils import utils
class VisionAnalyzer:
@ -146,14 +147,34 @@ class VisionAnalyzer:
response_text = result['response']
image_paths = result['image_paths']
img_name_start = Path(image_paths[0]).stem.split('_')[-1]
img_name_end = Path(image_paths[-1]).stem.split('_')[-1]
txt_path = os.path.join(output_dir, f"frame_{img_name_start}_{img_name_end}.txt")
# 从文件名中提取时间戳并转换为标准格式
def format_timestamp(img_path):
# 从文件名中提取时间部分
timestamp = Path(img_path).stem.split('_')[-1]
try:
# 将时间转换为秒
seconds = utils.time_to_seconds(timestamp.replace('_', ':'))
# 转换为 HH:MM:SS,mmm 格式
hours = int(seconds // 3600)
minutes = int((seconds % 3600) // 60)
seconds_remainder = seconds % 60
whole_seconds = int(seconds_remainder)
milliseconds = int((seconds_remainder - whole_seconds) * 1000)
return f"{hours:02d}:{minutes:02d}:{whole_seconds:02d},{milliseconds:03d}"
except Exception as e:
logger.error(f"时间戳格式转换错误: {timestamp}, {str(e)}")
return timestamp
start_timestamp = format_timestamp(image_paths[0])
end_timestamp = format_timestamp(image_paths[-1])
txt_path = os.path.join(output_dir, f"frame_{start_timestamp}_{end_timestamp}.txt")
# 保存结果到txt文件
with open(txt_path, 'w', encoding='utf-8') as f:
f.write(response_text.strip())
print(f"已保存分析结果到: {txt_path}")
logger.info(f"已保存分析结果到: {txt_path}")
def load_images(self, image_paths: List[str]) -> List[PIL.Image.Image]:
"""

View File

@ -0,0 +1,265 @@
import json
from typing import List, Union, Dict
import os
from pathlib import Path
from loguru import logger
from tqdm import tqdm
import asyncio
from tenacity import retry, stop_after_attempt, RetryError, wait_exponential
from openai import OpenAI
import PIL.Image
import base64
import io
import traceback
class QwenAnalyzer:
"""千问视觉分析器类"""
def __init__(self, model_name: str = "qwen-vl-max-latest", api_key: str = None, base_url: str = None):
"""
初始化千问视觉分析器
Args:
model_name: 模型名称默认使用 qwen-vl-max-latest
api_key: 阿里云API密钥
base_url: API基础URL如果为None则使用默认值
"""
if not api_key:
raise ValueError("必须提供API密钥")
self.model_name = model_name
self.api_key = api_key
self.base_url = base_url or "https://dashscope.aliyuncs.com/compatible-mode/v1"
# 配置API客户端
self._configure_client()
def _configure_client(self):
"""
配置API客户端
使用最简化的参数配置避免不必要的参数
"""
try:
self.client = OpenAI(
api_key=self.api_key,
base_url=self.base_url
)
except Exception as e:
logger.error(f"初始化OpenAI客户端失败: {str(e)}")
raise
def _image_to_base64(self, image: PIL.Image.Image) -> str:
"""
将PIL图片对象转换为base64字符串
"""
buffered = io.BytesIO()
image.save(buffered, format="JPEG")
return base64.b64encode(buffered.getvalue()).decode("utf-8")
@retry(
stop=stop_after_attempt(3),
wait=wait_exponential(multiplier=1, min=4, max=10)
)
async def _generate_content_with_retry(self, prompt: str, batch: List[PIL.Image.Image]):
"""使用重试机制的内部方法来调用千问API"""
try:
# 构建消息内容
content = []
# 添加图片
for img in batch:
base64_image = self._image_to_base64(img)
content.append({
"type": "image_url",
"image_url": {
"url": f"data:image/jpeg;base64,{base64_image}"
}
})
# 添加文本提示
content.append({
"type": "text",
"text": prompt
})
# 调用API
response = await asyncio.to_thread(
self.client.chat.completions.create,
model=self.model_name,
messages=[{
"role": "user",
"content": content
}]
)
return response.choices[0].message.content
except Exception as e:
logger.error(f"API调用错误: {str(e)}")
raise RetryError("API调用失败")
async def analyze_images(self,
images: Union[List[str], List[PIL.Image.Image]],
prompt: str,
batch_size: int = 5) -> List[Dict]:
"""
批量分析多张图片
Args:
images: 图片路径列表或PIL图片对象列表
prompt: 分析提示词
batch_size: 批处理大小
Returns:
分析结果列表
"""
try:
# 保存原始图片路径(如果是路径列表的话)
original_paths = images if isinstance(images[0], str) else None
# 加载图片
if isinstance(images[0], str):
logger.info("正在加载图片...")
images = self.load_images(images)
# 验证图片列表
if not images:
raise ValueError("图片列表为空")
# 验证每个图片对象
valid_images = []
valid_paths = []
for i, img in enumerate(images):
if not isinstance(img, PIL.Image.Image):
logger.error(f"无效的图片对象,索引 {i}: {type(img)}")
continue
valid_images.append(img)
if original_paths:
valid_paths.append(original_paths[i])
if not valid_images:
raise ValueError("没有有效的图片对象")
images = valid_images
results = []
total_batches = (len(images) + batch_size - 1) // batch_size
with tqdm(total=total_batches, desc="分析进度") as pbar:
for i in range(0, len(images), batch_size):
batch = images[i:i + batch_size]
batch_paths = valid_paths[i:i + batch_size] if valid_paths else None
retry_count = 0
while retry_count < 3:
try:
# 在每个批次处理前<E79086><E5898D>加小延迟
if i > 0:
await asyncio.sleep(2)
# 确保每个批次的图片都是有效的
valid_batch = [img for img in batch if isinstance(img, PIL.Image.Image)]
if not valid_batch:
raise ValueError(f"批次 {i // batch_size} 中没有有效的图片")
response = await self._generate_content_with_retry(prompt, valid_batch)
result_dict = {
'batch_index': i // batch_size,
'images_processed': len(valid_batch),
'response': response,
'model_used': self.model_name
}
# 添加图片路径信息(如果有的话)
if batch_paths:
result_dict['image_paths'] = batch_paths
results.append(result_dict)
break
except Exception as e:
retry_count += 1
error_msg = f"批次 {i // batch_size} 处理出错: {str(e)}"
logger.error(error_msg)
if retry_count >= 3:
results.append({
'batch_index': i // batch_size,
'images_processed': len(batch),
'error': error_msg,
'model_used': self.model_name,
'image_paths': batch_paths if batch_paths else []
})
else:
logger.info(f"批次 {i // batch_size} 处理失败等待60秒后重试当前批次...")
await asyncio.sleep(60)
pbar.update(1)
return results
except Exception as e:
error_msg = f"图片分析过程中发生错误: {str(e)}\n{traceback.format_exc()}"
logger.error(error_msg)
raise Exception(error_msg)
def save_results_to_txt(self, results: List[Dict], output_dir: str):
"""将分析结果保存到txt文件"""
# 确保输出目录存在
os.makedirs(output_dir, exist_ok=True)
for i, result in enumerate(results):
response_text = result['response']
# 如果有图片路径信息<E681AF><EFBC8C><EFBFBD>用它来生成文件名
if result.get('image_paths'):
image_paths = result['image_paths']
img_name_start = Path(image_paths[0]).stem.split('_')[-1]
img_name_end = Path(image_paths[-1]).stem.split('_')[-1]
file_name = f"frame_{img_name_start}_{img_name_end}.txt"
else:
# 如果没有路径信息,使用批次索引
file_name = f"batch_{result['batch_index']}.txt"
txt_path = os.path.join(output_dir, file_name)
# 保存结果到txt文件
with open(txt_path, 'w', encoding='utf-8') as f:
f.write(response_text.strip())
logger.info(f"已保存分析结果到: {txt_path}")
def load_images(self, image_paths: List[str]) -> List[PIL.Image.Image]:
"""
加载多张图片
Args:
image_paths: 图片路径列表
Returns:
加载后的PIL Image对象列表
"""
images = []
failed_images = []
for img_path in image_paths:
try:
if not os.path.exists(img_path):
logger.error(f"图片文件不存在: {img_path}")
failed_images.append(img_path)
continue
img = PIL.Image.open(img_path)
# 确保图片被完全加载
img.load()
# 转换为RGB模式
if img.mode != 'RGB':
img = img.convert('RGB')
images.append(img)
except Exception as e:
logger.error(f"无法加载图片 {img_path}: {str(e)}")
failed_images.append(img_path)
if failed_images:
logger.warning(f"以下图片加载失败:\n{json.dumps(failed_images, indent=2, ensure_ascii=False)}")
if not images:
raise ValueError("没有成功加载任何图片")
return images

View File

@ -374,22 +374,65 @@ class ScriptProcessor:
记住要敢于用"温和的违反"制造笑点但要把握好尺度让观众在轻松愉快中感受到乐趣"""
def calculate_duration_and_word_count(self, time_range: str) -> int:
"""
计算时间范围的持续时长并估算合适的字数
Args:
time_range: 时间范围字符串,格式为 "HH:MM:SS,mmm-HH:MM:SS,mmm"
例如: "00:00:50,100-00:01:21,500"
Returns:
int: 估算的合适字数
基于经验公式: 每0.35秒可以说一个字
例如: 10秒可以说约28个字 (10/0.3528.57)
"""
try:
start_str, end_str = time_range.split('-')
def time_to_seconds(time_str):
minutes, seconds = map(int, time_str.split(':'))
return minutes * 60 + seconds
def time_to_seconds(time_str: str) -> float:
"""
将时间字符串转换为秒数(带毫秒精度)
Args:
time_str: 时间字符串,格式为 "HH:MM:SS,mmm"
例如: "00:00:50,100" 表示50.1
Returns:
float: 转换后的秒数(带毫秒)
"""
try:
# 处理毫秒部分
time_part, ms_part = time_str.split(',')
hours, minutes, seconds = map(int, time_part.split(':'))
milliseconds = int(ms_part)
# 转换为秒
total_seconds = (hours * 3600) + (minutes * 60) + seconds + (milliseconds / 1000)
return total_seconds
except ValueError as e:
logger.warning(f"时间格式解析错误: {time_str}, error: {e}")
return 0.0
# 计算开始和结束时间的秒数
start_seconds = time_to_seconds(start_str)
end_seconds = time_to_seconds(end_str)
# 计算持续时间(秒)
duration = end_seconds - start_seconds
word_count = int(duration / 0.35)
# 根据经验公式计算字数: 每0.5秒一个字
word_count = int(duration / 0.4)
# 确保字数在合理范围内
word_count = max(10, min(word_count, 500)) # 限制在10-500字之间
logger.debug(f"时间范围 {time_range} 的持续时间为 {duration:.3f}秒, 估算字数: {word_count}")
return word_count
except Exception as e:
logger.info(f"时间格式转换错误: {traceback.format_exc()}")
return 100
logger.warning(f"字数计算错误: {traceback.format_exc()}")
return 100 # 发生错误时返回默认字数
def process_frames(self, frame_content_list: List[Dict]) -> List[Dict]:
for frame_content in frame_content_list:
@ -406,22 +449,47 @@ class ScriptProcessor:
def _save_results(self, frame_content_list: List[Dict]):
"""保存处理结果,并添加新的时间戳"""
try:
# 转换秒数为 MM:SS 格式
def seconds_to_time(seconds):
minutes = seconds // 60
remaining_seconds = seconds % 60
return f"{minutes:02d}:{remaining_seconds:02d}"
def format_timestamp(seconds: float) -> str:
"""将秒数转换为 HH:MM:SS,mmm 格式"""
hours = int(seconds // 3600)
minutes = int((seconds % 3600) // 60)
seconds_remainder = seconds % 60
whole_seconds = int(seconds_remainder)
milliseconds = int((seconds_remainder - whole_seconds) * 1000)
return f"{hours:02d}:{minutes:02d}:{whole_seconds:02d},{milliseconds:03d}"
# 计算新的时间戳
current_time = 0 # 当前时间点(秒)
current_time = 0.0 # 当前时间点(秒,包含毫秒)
for frame in frame_content_list:
# 获取原始时间戳的持续时间
start_str, end_str = frame['timestamp'].split('-')
def time_to_seconds(time_str):
minutes, seconds = map(int, time_str.split(':'))
return minutes * 60 + seconds
def time_to_seconds(time_str: str) -> float:
"""将时间字符串转换为秒数(包含毫秒)"""
try:
if ',' in time_str:
time_part, ms_part = time_str.split(',')
ms = float(ms_part) / 1000
else:
time_part = time_str
ms = 0
parts = time_part.split(':')
if len(parts) == 3: # HH:MM:SS
h, m, s = map(float, parts)
seconds = h * 3600 + m * 60 + s
elif len(parts) == 2: # MM:SS
m, s = map(float, parts)
seconds = m * 60 + s
else: # SS
seconds = float(parts[0])
return seconds + ms
except Exception as e:
logger.error(f"时间格式转换错误 {time_str}: {str(e)}")
return 0.0
# 计算当前片段的持续时间
start_seconds = time_to_seconds(start_str)
@ -429,8 +497,8 @@ class ScriptProcessor:
duration = end_seconds - start_seconds
# 设置新的时间戳
new_start = seconds_to_time(current_time)
new_end = seconds_to_time(current_time + duration)
new_start = format_timestamp(current_time)
new_end = format_timestamp(current_time + duration)
frame['new_timestamp'] = f"{new_start}-{new_end}"
# 更新当前时间点
@ -443,7 +511,7 @@ class ScriptProcessor:
with open(file_name, 'w', encoding='utf-8') as file:
json.dump(frame_content_list, file, ensure_ascii=False, indent=4)
logger.info(f"保存脚本成功,总时长: {seconds_to_time(current_time)}")
logger.info(f"保存脚本成功,总时长: {format_timestamp(current_time)}")
except Exception as e:
logger.error(f"保存结果时发生错误: {str(e)}\n{traceback.format_exc()}")

View File

@ -40,7 +40,7 @@ def to_json(obj):
# 如果对象是二进制数据转换为base64编码的字符串
elif isinstance(o, bytes):
return "*** binary data ***"
# 如果象是字典,递归处理每个键值对
# 如果象是字典,递归处理每个键值对
elif isinstance(o, dict):
return {k: serialize(v) for k, v in o.items()}
# 如果对象是列表或元组,递归处理每个元素
@ -56,7 +56,7 @@ def to_json(obj):
# 使用serialize函数处理输入对象
serialized_obj = serialize(obj)
# 序列化处理后的对象为JSON<EFBFBD><EFBFBD><EFBFBD>符串
# 序列化处理后的对象为JSON符串
return json.dumps(serialized_obj, ensure_ascii=False, indent=4)
except Exception as e:
return None
@ -126,6 +126,15 @@ def public_dir(sub_dir: str = ""):
return d
def srt_dir(sub_dir: str = ""):
d = resource_dir(f"srt")
if sub_dir:
d = os.path.join(d, sub_dir)
if not os.path.exists(d):
os.makedirs(d)
return d
def run_in_background(func, *args, **kwargs):
def run():
try:
@ -302,15 +311,49 @@ def get_current_country():
def time_to_seconds(time_str: str) -> float:
parts = time_str.split(':')
if len(parts) == 2:
m, s = map(float, parts)
return m * 60 + s
elif len(parts) == 3:
h, m, s = map(float, parts)
return h * 3600 + m * 60 + s
else:
raise ValueError(f"Invalid time format: {time_str}")
"""
将时间字符串转换为秒数支持多种格式
- "HH:MM:SS,mmm" -> 小时:分钟:,毫秒
- "MM:SS,mmm" -> 分钟:,毫秒
- "SS,mmm" -> ,毫秒
- "SS-mmm" -> -毫秒
Args:
time_str: 时间字符串
Returns:
float: 转换后的秒数(包含毫秒)
"""
try:
# 处理带有'-'的毫秒格式
if '-' in time_str:
time_part, ms_part = time_str.split('-')
ms = float(ms_part) / 1000
# 处理带有','的毫秒格式
elif ',' in time_str:
time_part, ms_part = time_str.split(',')
ms = float(ms_part) / 1000
else:
time_part = time_str
ms = 0
# 分割时间部分
parts = time_part.split(':')
if len(parts) == 3: # HH:MM:SS
h, m, s = map(float, parts)
seconds = h * 3600 + m * 60 + s
elif len(parts) == 2: # MM:SS
m, s = map(float, parts)
seconds = m * 60 + s
else: # SS
seconds = float(parts[0])
return seconds + ms
except (ValueError, IndexError) as e:
logger.error(f"时间格式转换错误 {time_str}: {str(e)}")
return 0.0
def seconds_to_time(seconds: float) -> str:
@ -320,15 +363,25 @@ def seconds_to_time(seconds: float) -> str:
def calculate_total_duration(scenes):
"""
计算场景列表的总时长
Args:
scenes: 场景列表每个场景包含 timestamp 字段格式如 "00:00:28,350-00:00:41,000"
Returns:
float: 总时长
"""
total_seconds = 0
for scene in scenes:
start, end = scene['timestamp'].split('-')
start_time = datetime.strptime(start, '%M:%S')
end_time = datetime.strptime(end, '%M:%S')
# 使用 time_to_seconds 函数处理更精确的时间格式
start_seconds = time_to_seconds(start)
end_seconds = time_to_seconds(end)
duration = end_time - start_time
total_seconds += duration.total_seconds()
duration = end_seconds - start_seconds
total_seconds += duration
return total_seconds
@ -451,7 +504,7 @@ def clear_keyframes_cache(video_path: str = None):
return
if video_path:
# <EFBFBD><EFBFBD><EFBFBD>理指定视频的缓存
# 理指定视频的缓存
video_hash = md5(video_path + str(os.path.getmtime(video_path)))
video_keyframes_dir = os.path.join(keyframes_dir, video_hash)
if os.path.exists(video_keyframes_dir):
@ -520,3 +573,21 @@ def download_font(url: str, font_path: str):
except Exception as e:
logger.error(f"下载字体文件失败: {e}")
raise
def init_imagemagick():
"""初始化 ImageMagick 配置"""
try:
# 检查 ImageMagick 是否已安装
import subprocess
result = subprocess.run(['magick', '-version'], capture_output=True, text=True)
if result.returncode != 0:
logger.error("ImageMagick 未安装或配置不正确")
return False
# 设置 IMAGEMAGICK_BINARY 环境变量
os.environ['IMAGEMAGICK_BINARY'] = 'magick'
return True
except Exception as e:
logger.error(f"初始化 ImageMagick 失败: {str(e)}")
return False

View File

@ -51,21 +51,34 @@ class VideoProcessor:
def detect_shot_boundaries(self, frames: List[np.ndarray], threshold: int = 30) -> List[int]:
"""
使用帧差法检测镜头边界
Args:
frames: 视频帧列表
threshold: 差异阈值
threshold: 差异阈值默认值调低为30
Returns:
List[int]: 镜头边界帧的索引列表
"""
shot_boundaries = []
if len(frames) < 2: # 添加帧数检查
logger.warning("视频帧数过少,无法检测场景边界")
return [len(frames) - 1] # 返回最后一帧作为边界
for i in range(1, len(frames)):
prev_frame = cv2.cvtColor(frames[i - 1], cv2.COLOR_BGR2GRAY)
curr_frame = cv2.cvtColor(frames[i], cv2.COLOR_BGR2GRAY)
diff = np.mean(np.abs(curr_frame.astype(int) - prev_frame.astype(int)))
# 计算帧差
diff = np.mean(np.abs(curr_frame.astype(float) - prev_frame.astype(float)))
if diff > threshold:
shot_boundaries.append(i)
# 如果没有检测到任何边界,至少返回最后一帧
if not shot_boundaries:
logger.warning("未检测到场景边界,将视频作为单个场景处理")
shot_boundaries.append(len(frames) - 1)
return shot_boundaries
def extract_keyframes(self, frames: List[np.ndarray], shot_boundaries: List[int]) -> Tuple[
@ -113,12 +126,7 @@ class VideoProcessor:
output_dir: str, desc: str = "保存关键帧") -> None:
"""
保存关键帧到指定目录文件名格式为keyframe_帧序号_时间戳.jpg
Args:
keyframes: 关键帧列表
keyframe_indices: 关键帧索引列表
output_dir: 输出目录
desc: 进度条描述
时间戳精确到毫秒格式为HHMMSSmmm
"""
if not os.path.exists(output_dir):
os.makedirs(output_dir)
@ -126,11 +134,13 @@ class VideoProcessor:
for keyframe, frame_idx in tqdm(zip(keyframes, keyframe_indices),
total=len(keyframes),
desc=desc):
# 计算精确到毫秒的时间戳
timestamp = frame_idx / self.fps
hours = int(timestamp // 3600)
minutes = int((timestamp % 3600) // 60)
seconds = int(timestamp % 60)
time_str = f"{hours:02d}{minutes:02d}{seconds:02d}"
milliseconds = int((timestamp % 1) * 1000) # 计算毫秒部分
time_str = f"{hours:02d}{minutes:02d}{seconds:02d}{milliseconds:03d}"
output_path = os.path.join(output_dir,
f'keyframe_{frame_idx:06d}_{time_str}.jpg')
@ -138,11 +148,7 @@ class VideoProcessor:
def extract_frames_by_numbers(self, frame_numbers: List[int], output_folder: str) -> None:
"""
根据指定的帧号提取帧如果多个帧在同一秒内只保留一个
Args:
frame_numbers: 要提取的帧号列表
output_folder: 输出文件夹路径
根据指定的帧号提取帧如果多个帧在同一毫秒内只保留一个
"""
if not frame_numbers:
raise ValueError("未提供帧号列表")
@ -153,29 +159,31 @@ class VideoProcessor:
if not os.path.exists(output_folder):
os.makedirs(output_folder)
# 用于记录已处理的时间戳(秒)
processed_seconds = set()
# 用于记录已处理的时间戳(秒)
processed_timestamps = set()
for frame_number in tqdm(frame_numbers, desc="提取高清帧"):
# 计算时间戳(秒)
timestamp_seconds = int(frame_number / self.fps)
# 计算精确到毫秒的时间戳
timestamp = frame_number / self.fps
timestamp_ms = int(timestamp * 1000) # 转换为毫秒
# 如果这一秒已经处理过,跳过
if timestamp_seconds in processed_seconds:
# 如果这一秒已经处理过,跳过
if timestamp_ms in processed_timestamps:
continue
self.cap.set(cv2.CAP_PROP_POS_FRAMES, frame_number)
ret, frame = self.cap.read()
if ret:
# 记录这一秒已经处理
processed_seconds.add(timestamp_seconds)
# 记录这一秒已经处理
processed_timestamps.add(timestamp_ms)
# 计算时间戳字符串
hours = int(timestamp_seconds // 3600)
minutes = int((timestamp_seconds % 3600) // 60)
seconds = int(timestamp_seconds % 60)
time_str = f"{hours:02d}{minutes:02d}{seconds:02d}"
hours = int(timestamp // 3600)
minutes = int((timestamp % 3600) // 60)
seconds = int(timestamp % 60)
milliseconds = int((timestamp % 1) * 1000) # 计算毫秒部分
time_str = f"{hours:02d}{minutes:02d}{seconds:02d}{milliseconds:03d}"
output_path = os.path.join(output_folder,
f"keyframe_{frame_number:06d}_{time_str}.jpg")
@ -183,27 +191,34 @@ class VideoProcessor:
else:
logger.info(f"无法读取帧 {frame_number}")
logger.info(f"共提取了 {len(processed_seconds)} 个不同时间戳的帧")
logger.info(f"共提取了 {len(processed_timestamps)} 个不同时间戳的帧")
@staticmethod
def extract_numbers_from_folder(folder_path: str) -> List[int]:
"""
从文件夹中提取帧号
Args:
folder_path: 关键帧文件夹路径
Returns:
List[int]: 排序后的帧号列表
"""
files = [f for f in os.listdir(folder_path) if f.endswith('.jpg')]
# 更新正则表达式以匹配新的文件名格式keyframe_000123_010534.jpg
pattern = re.compile(r'keyframe_(\d+)_\d+\.jpg$')
# 更新正则表达式以匹配新的文件名格式keyframe_000123_010534123.jpg
pattern = re.compile(r'keyframe_(\d+)_\d{9}\.jpg$')
numbers = []
for f in files:
match = pattern.search(f)
if match:
numbers.append(int(match.group(1)))
else:
logger.warning(f"文件名格式不匹配: {f}")
if not numbers:
logger.error(f"在目录 {folder_path} 中未找到有效的关键帧文件")
return sorted(numbers)
def process_video(self, output_dir: str, skip_seconds: float = 0, threshold: int = 30) -> None:
@ -212,7 +227,7 @@ class VideoProcessor:
Args:
output_dir: 输出目录
skip_seconds: 跳过视<EFBFBD><EFBFBD><EFBFBD>开头的秒数
skip_seconds: 跳过视开头的秒数
"""
skip_frames = int(skip_seconds * self.fps)
@ -240,11 +255,14 @@ class VideoProcessor:
def process_video_pipeline(self,
output_dir: str,
skip_seconds: float = 0,
threshold: int = 30,
threshold: int = 20, # 降低默认阈值
compressed_width: int = 320,
keep_temp: bool = False) -> None:
"""
执行完整的视频处理流程压缩提取关键帧导出高清帧
执行完整的视频处理流程
Args:
threshold: 降低默认阈值为20使场景检测更敏感
"""
os.makedirs(output_dir, exist_ok=True)
temp_dir = os.path.join(output_dir, 'temp')
@ -358,7 +376,7 @@ if __name__ == "__main__":
import time
start_time = time.time()
processor = VideoProcessor("best.mp4")
processor.process_video_pipeline(output_dir="output4")
processor = VideoProcessor("E:\\projects\\NarratoAI\\resource\\videos\\test.mp4")
processor.process_video_pipeline(output_dir="output")
end_time = time.time()
print(f"处理完成!总耗时: {end_time - start_time:.2f}")

View File

@ -1,5 +1,5 @@
[app]
project_version="0.3.5"
project_version="0.3.9"
# 支持视频理解的大模型提供商
# gemini
# NarratoAPI

View File

@ -1,3 +1,4 @@
import os
import uvicorn
from loguru import logger
@ -7,6 +8,8 @@ if __name__ == "__main__":
logger.info(
"start server, docs: http://127.0.0.1:" + str(config.listen_port) + "/docs"
)
os.environ["HTTP_PROXY"] = config.proxy.get("http")
os.environ["HTTPS_PROXY"] = config.proxy.get("https")
uvicorn.run(
app="app.asgi:app",
host=config.listen_host,

View File

@ -1,5 +1,5 @@
requests~=2.31.0
moviepy~=2.0.0.dev2
moviepy==2.0.0.dev2
faster-whisper~=1.0.1
edge_tts~=6.1.15
uvicorn~=0.27.1
@ -26,9 +26,12 @@ psutil>=5.9.0
opencv-python~=4.10.0.84
scikit-learn~=1.5.2
google-generativeai~=0.8.3
Pillow>=11.0.0
pillow==10.3.0
python-dotenv~=1.0.1
openai~=1.53.0
tqdm>=4.66.6
tenacity>=9.0.0
tiktoken==0.8.0
tiktoken==0.8.0
yt-dlp==2024.11.18
pysrt==1.1.2
httpx==0.27.2

View File

@ -0,0 +1 @@
此处放字体文件

View File

View File

View File

View File

178
video_pipeline.py Normal file
View File

@ -0,0 +1,178 @@
import requests
import json
import os
import time
from typing import Dict, Any
class VideoPipeline:
def __init__(self, base_url: str = "http://127.0.0.1:8080"):
self.base_url = base_url
def download_video(self, url: str, resolution: str = "1080p",
output_format: str = "mp4", rename: str = None) -> Dict[str, Any]:
"""下载视频的第一步"""
endpoint = f"{self.base_url}/api/v2/youtube/download"
payload = {
"url": url,
"resolution": resolution,
"output_format": output_format,
"rename": rename or time.strftime("%Y-%m-%d")
}
response = requests.post(endpoint, json=payload)
response.raise_for_status()
return response.json()
def generate_script(self, video_path: str, skip_seconds: int = 0,
threshold: int = 30, vision_batch_size: int = 10,
vision_llm_provider: str = "gemini") -> Dict[str, Any]:
"""生成脚本的第二步"""
endpoint = f"{self.base_url}/api/v2/scripts/generate"
payload = {
"video_path": video_path,
"skip_seconds": skip_seconds,
"threshold": threshold,
"vision_batch_size": vision_batch_size,
"vision_llm_provider": vision_llm_provider
}
response = requests.post(endpoint, json=payload)
response.raise_for_status()
return response.json()
def crop_video(self, video_path: str, script: list) -> Dict[str, Any]:
"""剪辑视频的第三步"""
endpoint = f"{self.base_url}/api/v2/scripts/crop"
payload = {
"video_origin_path": video_path,
"video_script": script
}
response = requests.post(endpoint, json=payload)
response.raise_for_status()
return response.json()
def generate_final_video(self, task_id: str, video_path: str,
script_path: str, script: list, subclip_videos: Dict[str, str], voice_name: str) -> Dict[str, Any]:
"""生成最终视频的第四步"""
endpoint = f"{self.base_url}/api/v2/scripts/start-subclip"
request_data = {
"video_clip_json": script,
"video_clip_json_path": script_path,
"video_origin_path": video_path,
"video_aspect": "16:9",
"video_language": "zh-CN",
"voice_name": voice_name,
"voice_volume": 1,
"voice_rate": 1.2,
"voice_pitch": 1,
"bgm_name": "random",
"bgm_type": "random",
"bgm_file": "",
"bgm_volume": 0.3,
"subtitle_enabled": True,
"subtitle_position": "bottom",
"font_name": "STHeitiMedium.ttc",
"text_fore_color": "#FFFFFF",
"text_background_color": "transparent",
"font_size": 75,
"stroke_color": "#000000",
"stroke_width": 1.5,
"custom_position": 70,
"n_threads": 8
}
payload = {
"request": request_data,
"subclip_videos": subclip_videos
}
params = {"task_id": task_id}
response = requests.post(endpoint, params=params, json=payload)
response.raise_for_status()
return response.json()
def save_script_to_json(self, script: list, script_path: str) -> str:
"""保存脚本到json文件"""
try:
with open(script_path, 'w', encoding='utf-8') as f:
json.dump(script, f, ensure_ascii=False, indent=2)
print(f"脚本已保存到: {script_path}")
return script_path
except Exception as e:
print(f"保存脚本失败: {str(e)}")
raise
def run_pipeline(self, task_id: str, script_name: str, youtube_url: str, video_name: str="null", skip_seconds: int = 0, threshold: int = 30, vision_batch_size: int = 10, vision_llm_provider: str = "gemini", voice_name: str = "zh-CN-YunjianNeural") -> Dict[str, Any]:
"""运行完整的pipeline"""
try:
current_path = os.path.dirname(os.path.abspath(__file__))
video_path = os.path.join(current_path, "resource", "videos", f"{video_name}.mp4")
# 判断视频是否存在
if not os.path.exists(video_path):
# 1. 下载视频
print(f"视频不存在, 开始下载视频: {video_path}")
download_result = self.download_video(url=youtube_url, resolution="1080p", output_format="mp4", rename=video_name)
video_path = download_result["output_path"]
else:
print(f"视频已存在: {video_path}")
# 2. 判断script_name是否存在
# 2.1.1 拼接脚本路径 NarratoAI/resource/scripts
script_path = os.path.join(current_path, "resource", "scripts", script_name)
if os.path.exists(script_path):
script = json.load(open(script_path, "r", encoding="utf-8"))
else:
# 2.1.2 生成脚本
print("开始生成脚本...")
script_result = self.generate_script(video_path=video_path, skip_seconds=skip_seconds, threshold=threshold, vision_batch_size=vision_batch_size, vision_llm_provider=vision_llm_provider)
script = script_result["script"]
# 2.2 保存脚本到json文件
print("保存脚本到json文件...")
self.save_script_to_json(script=script, script_path=script_path)
# 3. 剪辑视频
print("开始剪辑视频...")
crop_result = self.crop_video(video_path=video_path, script=script)
subclip_videos = crop_result["subclip_videos"]
# 4. 生成最终视频
print("开始生成最终视频...")
self.generate_final_video(
task_id=task_id,
video_path=video_path,
script_path=script_path,
script=script,
subclip_videos=subclip_videos,
voice_name=voice_name
)
return {
"status": "等待异步生成视频",
"path": os.path.join(current_path, "storage", "tasks", task_id)
}
except Exception as e:
return {
"status": "error",
"error": str(e)
}
# 使用示例
if __name__ == "__main__":
pipeline = VideoPipeline()
result = pipeline.run_pipeline(
task_id="test_111901",
script_name="test.json",
youtube_url="https://www.youtube.com/watch?v=vLJ7Yed6FQ4",
video_name="2024-11-19-01",
skip_seconds=50,
threshold=35,
vision_batch_size=10,
vision_llm_provider="gemini",
voice_name="zh-CN-YunjianNeural",
)
print(result)

View File

@ -3,7 +3,7 @@ import os
import sys
from uuid import uuid4
from app.config import config
from webui.components import basic_settings, video_settings, audio_settings, subtitle_settings, script_settings, review_settings
from webui.components import basic_settings, video_settings, audio_settings, subtitle_settings, script_settings, review_settings, merge_settings, system_settings
from webui.utils import cache, file_utils
from app.utils import utils
from app.models.schema import VideoClipParams, VideoAspect
@ -178,7 +178,9 @@ def main():
# 渲染基础设置面板
basic_settings.render_basic_settings(tr)
# 渲染合并设置
merge_settings.render_merge_settings(tr)
# 渲染主面板
panel = st.columns(3)
with panel[0]:
@ -188,6 +190,8 @@ def main():
audio_settings.render_audio_panel(tr)
with panel[2]:
subtitle_settings.render_subtitle_panel(tr)
# 渲染系统设置面板
system_settings.render_system_panel(tr)
# 渲染视频审查面板
review_settings.render_review_panel(tr)

325
webui.txt
View File

@ -47,3 +47,328 @@ pause
rem set HF_ENDPOINT=https://hf-mirror.com
streamlit run webui.py --browser.serverAddress="127.0.0.1" --server.enableCORS=True --server.maxUploadSize=2048 --browser.gatherUsageStats=False
请求0
curl -X 'POST' \
'http://127.0.0.1:8080/api/v2/youtube/download' \
-H 'accept: application/json' \
-H 'Content-Type: application/json' \
-d '{
"url": "https://www.youtube.com/watch?v=Kenm35gdqtk",
"resolution": "1080p",
"output_format": "mp4",
"rename": "2024-11-19"
}'
{
"url": "https://www.youtube.com/watch?v=Kenm35gdqtk",
"resolution": "1080p",
"output_format": "mp4",
"rename": "2024-11-19"
}
请求1
curl -X 'POST' \
'http://127.0.0.1:8080/api/v2/scripts/generate' \
-H 'accept: application/json' \
-H 'Content-Type: application/json' \
-d '{
"video_path": "E:\\projects\\NarratoAI\\resource\\videos\\test.mp4",
"skip_seconds": 0,
"threshold": 30,
"vision_batch_size": 10,
"vision_llm_provider": "gemini"
}'
{
"video_path": "E:\\projects\\NarratoAI\\resource\\videos\\test.mp4",
"skip_seconds": 0,
"threshold": 30,
"vision_batch_size": 10,
"vision_llm_provider": "gemini"
}
请求2
curl -X 'POST' \
'http://127.0.0.1:8080/api/v2/scripts/crop' \
-H 'accept: application/json' \
-H 'Content-Type: application/json' \
-d '{
"video_origin_path": "E:\\projects\\NarratoAI\\resource\\videos\\test.mp4",
"video_script": [
{
"timestamp": "00:10-01:01",
"picture": "好的,以下是视频画面的客观描述:\n\n视频展现一名留着胡须的男子在森林里挖掘。\n\n画面首先展现男子从后方视角背着军绿色背包穿着卡其色长裤和深色T恤走向一个泥土斜坡。背包上似乎有一个镐头。\n\n下一个镜头特写展现了该背包一个镐头从背包里伸出来包里还有一些其他工具。\n\n然后视频显示该男子用镐头挖掘泥土斜坡。\n\n接下来是一些近景镜头展现男子的靴子在泥土中行走以及男子用手清理泥土。\n\n其他镜头从不同角度展现该男子在挖掘包括从侧面和上方。\n\n可以看到他用工具挖掘清理泥土并检查挖出的土壤。\n\n最后一个镜头展现了挖出的土壤的质地和颜色。",
"narration": "好的,接下来就是我们这位“胡须大侠”的精彩冒险了!只见他背着军绿色的背包,迈着比我上班还不情愿的步伐走向那泥土斜坡。哎呀,这个背包可真是个宝贝,里面藏着一把镐头和一些工具,简直像是个随身携带的“建筑工具箱”! \n\n看他挥舞着镐头挖掘泥土的姿势仿佛在进行一场“挖土大赛”结果却比我做饭还要糟糕。泥土飞扬中他的靴子也成了“泥巴艺术家”。最后那堆色泽各异的土壤就像他心情的写照——五彩斑斓又略显混乱真是一次让人捧腹的建造之旅",
"OST": 2,
"new_timestamp": "00:00-00:51"
},
{
"timestamp": "01:07-01:53",
"picture": "好的,以下是视频画面的客观描述:\n\n视频以一系列森林环境的镜头开头。\n\n第一个镜头是一个特写镜头镜头中显示的是一些带有水滴的绿色叶子。\n\n第二个镜头显示一个留着胡须的男子在森林中挖掘一个洞。 他跪在地上,用工具挖土。\n\n第三个镜头是一个中等镜头显示同一个人坐在他挖好的洞边休息。\n\n第四个镜头显示该洞的内部结构该洞在树根和地面之间。\n\n第五个镜头显示该男子用斧头砍树枝。\n\n第六个镜头显示一堆树枝横跨一个泥泞的小水坑。\n\n第七个镜头显示更多茂盛的树叶和树枝在阳光下。\n\n第八个镜头显示更多茂盛的树叶和树枝。\n\n\n",
"narration": "接下来,我们的“挖土大师”又开始了他的森林探险。看这镜头,水滴在叶子上闪烁,仿佛在说:“快来,快来,这里有故事!”他一边挖洞,一边像个新手厨师试图切洋葱——每一下都小心翼翼,生怕自己不小心挖出个“历史遗址”。坐下休息的时候,脸上的表情就像发现新大陆一样!然后,他拿起斧头砍树枝,简直是现代版的“神雕侠侣”,只不过对象是树木。最后,那堆树枝架过泥泞的小水坑,仿佛在说:“我就是不怕湿脚的勇士!”这就是我们的建造之旅!",
"OST": 2,
"new_timestamp": "00:51-01:37"
}
]
}'
{
"video_origin_path": "E:\\projects\\NarratoAI\\resource\\videos\\test.mp4",
"video_script": [
{
"timestamp": "00:10-01:01",
"picture": "好的,以下是视频画面的客观描述:\n\n视频展现一名留着胡须的男子在森林里挖掘。\n\n画面首先展现男子从后方视角背着军绿色背包穿着卡其色长裤和深色T恤走向一个泥土斜坡。背包上似乎有一个镐头。\n\n下一个镜头特写展现了该背包一个镐头从背包里伸出来包里还有一些其他工具。\n\n然后视频显示该男子用镐头挖掘泥土斜坡。\n\n接下来是一些近景镜头展现男子的靴子在泥土中行走以及男子用手清理泥土。\n\n其他镜头从不同角度展现该男子在挖掘包括从侧面和上方。\n\n可以看到他用工具挖掘清理泥土并检查挖出的土壤。\n\n最后一个镜头展现了挖出的土壤的质地和颜色。",
"narration": "好的,接下来就是我们这位“胡须大侠”的精彩冒险了!只见他背着军绿色的背包,迈着比我上班还不情愿的步伐走向那泥土斜坡。哎呀,这个背包可真是个宝贝,里面藏着一把镐头和一些工具,简直像是个随身携带的“建筑工具箱”! \n\n看他挥舞着镐头挖掘泥土的姿势仿佛在进行一场“挖土大赛”结果却比我做饭还要糟糕。泥土飞扬中他的靴子也成了“泥巴艺术家”。最后那堆色泽各异的土壤就像他心情的写照——五彩斑斓又略显混乱真是一次让人捧腹的建造之旅",
"OST": 2,
"new_timestamp": "00:00-00:51"
},
{
"timestamp": "01:07-01:53",
"picture": "好的,以下是视频画面的客观描述:\n\n视频以一系列森林环境的镜头开头。\n\n第一个镜头是一个特写镜头镜头中显示的是一些带有水滴的绿色叶子。\n\n第二个镜头显示一个留着胡须的男子在森林中挖掘一个洞。 他跪在地上,用工具挖土。\n\n第三个镜头是一个中等镜头显示同一个人坐在他挖好的洞边休息。\n\n第四个镜头显示该洞的内部结构该洞在树根和地面之间。\n\n第五个镜头显示该男子用斧头砍树枝。\n\n第六个镜头显示一堆树枝横跨一个泥泞的小水坑。\n\n第七个镜头显示更多茂盛的树叶和树枝在阳光下。\n\n第八个镜头显示更多茂盛的树叶和树枝。\n\n\n",
"narration": "接下来,我们的“挖土大师”又开始了他的森林探险。看这镜头,水滴在叶子上闪烁,仿佛在说:“快来,快来,这里有故事!”他一边挖洞,一边像个新手厨师试图切洋葱——每一下都小心翼翼,生怕自己不小心挖出个“历史遗址”。坐下休息的时候,脸上的表情就像发现新大陆一样!然后,他拿起斧头砍树枝,简直是现代版的“神雕侠侣”,只不过对象是树木。最后,那堆树枝架过泥泞的小水坑,仿佛在说:“我就是不怕湿脚的勇士!”这就是我们的建造之旅!",
"OST": 2,
"new_timestamp": "00:51-01:37"
}
]
}
请求3
curl -X 'POST' \
'http://127.0.0.1:8080/api/v2/scripts/start-subclip?task_id=12121' \
-H 'accept: application/json' \
-H 'Content-Type: application/json' \
-d '{
"request": {
"video_clip_json": [
{
"timestamp": "00:10-01:01",
"picture": "好的,以下是视频画面的客观描述:\n\n视频展现一名留着胡须的男子在森林里挖掘。\n\n画面首先展现男子从后方视角背着军绿色背包穿着卡其色长裤和深色T恤走向一个泥土斜坡。背包上似乎有一个镐头。\n\n下一个镜头特写展现了该背包一个镐头从背包里伸出来包里还有一些其他工具。\n\n然后视频显示该男子用镐头挖掘泥土斜坡。\n\n接下来是一些近景镜头展现男子的靴子在泥土中行走以及男子用手清理泥土。\n\n其他镜头从不同角度展现该男子在挖掘包括从侧面和上方。\n\n可以看到他用工具挖掘清理泥土并检查挖出的土壤。\n\n最后一个镜头展现了挖出的土壤的质地和颜色。",
"narration": "好的,接下来就是我们这位“胡须大侠”的精彩冒险了!只见他背着军绿色的背包,迈着比我上班还不情愿的步伐走向那泥土斜坡。哎呀,这个背包可真是个宝贝,里面藏着一把镐头和一些工具,简直像是个随身携带的“建筑工具箱”! \n\n看他挥舞着镐头挖掘泥土的姿势仿佛在进行一场“挖土大赛”结果却比我做饭还要糟糕。泥土飞扬中他的靴子也成了“泥巴艺术家”。最后那堆色泽各异的土壤就像他心情的写照——五彩斑斓又略显混乱真是一次让人捧腹的建造之旅",
"OST": 2,
"new_timestamp": "00:00-00:51"
},
{
"timestamp": "01:07-01:53",
"picture": "好的,以下是视频画面的客观描述:\n\n视频以一系列森林环境的镜头开头。\n\n第一个镜头是一个特写镜头镜头中显示的是一些带有水滴的绿色叶子。\n\n第二个镜头显示一个留着胡须的男子在森林中挖掘一个洞。 他跪在地上,用工具挖土。\n\n第三个镜头是一个中等镜头显示同一个人坐在他挖好的洞边休息。\n\n第四个镜头显示该洞的内部结构该洞在树根和地面之间。\n\n第五个镜头显示该男子用斧头砍树枝。\n\n第六个镜头显示一堆树枝横跨一个泥泞的小水坑。\n\n第七个镜头显示更多茂盛的树叶和树枝在阳光下。\n\n第八个镜头显示更多茂盛的树叶和树枝。\n\n\n",
"narration": "接下来,我们的“挖土大师”又开始了他的森林探险。看这镜头,水滴在叶子上闪烁,仿佛在说:“快来,快来,这里有故事!”他一边挖洞,一边像个新手厨师试图切洋葱——每一下都小心翼翼,生怕自己不小心挖出个“历史遗址”。坐下休息的时候,脸上的表情就像发现新大陆一样!然后,他拿起斧头砍树枝,简直是现代版的“神雕侠侣”,只不过对象是树木。最后,那堆树枝架过泥泞的小水坑,仿佛在说:“我就是不怕湿脚的勇士!”这就是我们的建造之旅!",
"OST": 2,
"new_timestamp": "00:51-01:37"
}
],
"video_clip_json_path": "E:\\projects\\NarratoAI\\resource\\scripts\\2024-1118-230421.json",
"video_origin_path": "E:\\projects\\NarratoAI\\resource\\videos\\test.mp4",
"video_aspect": "16:9",
"video_language": "zh-CN",
"voice_name": "zh-CN-YunjianNeural",
"voice_volume": 1,
"voice_rate": 1.2,
"voice_pitch": 1,
"bgm_name": "random",
"bgm_type": "random",
"bgm_file": "",
"bgm_volume": 0.3,
"subtitle_enabled": true,
"subtitle_position": "bottom",
"font_name": "STHeitiMedium.ttc",
"text_fore_color": "#FFFFFF",
"text_background_color": "transparent",
"font_size": 75,
"stroke_color": "#000000",
"stroke_width": 1.5,
"custom_position": 70,
"n_threads": 8
},
"subclip_videos": {
"00:10-01:01": "E:\\projects\\NarratoAI\\storage\\cache_videos/vid-00_10-01_01.mp4",
"01:07-01:53": "E:\\projects\\NarratoAI\\storage\\cache_videos/vid-01_07-01_53.mp4"
}
}'
{
"request": {
"video_clip_json": [
{
"timestamp": "00:10-01:01",
"picture": "好的,以下是视频画面的客观描述:\n\n视频展现一名留着胡须的男子在森林里挖掘。\n\n画面首先展现男子从后方视角背着军绿色背包穿着卡其色长裤和深色T恤走向一个泥土斜坡。背包上似乎有一个镐头。\n\n下一个镜头特写展现了该背包一个镐头从背包里伸出来包里还有一些其他工具。\n\n然后视频显示该男子用镐头挖掘泥土斜坡。\n\n接下来是一些近景镜头展现男子的靴子在泥土中行走以及男子用手清理泥土。\n\n其他镜头从不同角度展现该男子在挖掘包括从侧面和上方。\n\n可以看到他用工具挖掘清理泥土并检查挖出的土壤。\n\n最后一个镜头展现了挖出的土壤的质地和颜色。",
"narration": "好的,接下来就是我们这位“胡须大侠”的精彩冒险了!只见他背着军绿色的背包,迈着比我上班还不情愿的步伐走向那泥土斜坡。哎呀,这个背包可真是个宝贝,里面藏着一把镐头和一些工具,简直像是个随身携带的“建筑工具箱”! \n\n看他挥舞着镐头挖掘泥土的姿势仿佛在进行一场“挖土大赛”结果却比我做饭还要糟糕。泥土飞扬中他的靴子也成了“泥巴艺术家”。最后那堆色泽各异的土壤就像他心情的写照——五彩斑斓又略显混乱真是一次让人捧腹的建造之旅",
"OST": 2,
"new_timestamp": "00:00-00:51"
},
{
"timestamp": "01:07-01:53",
"picture": "好的,以下是视频画面的客观描述:\n\n视频以一系列森林环境的镜头开头。\n\n第一个镜头是一个特写镜头镜头中显示的是一些带有水滴的绿色叶子。\n\n第二个镜头显示一个留着胡须的男子在森林中挖掘一个洞。 他跪在地上,用工具挖土。\n\n第三个镜头是一个中等镜头显示同一个人坐在他挖好的洞边休息。\n\n第四个镜头显示该洞的内部结构该洞在树根和地面之间。\n\n第五个镜头显示该男子用斧头砍树枝。\n\n第六个镜头显示一堆树枝横跨一个泥泞的小水坑。\n\n第七个镜头显示更多茂盛的树叶和树枝在阳光下。\n\n第八个镜头显示更多茂盛的树叶和树枝。\n\n\n",
"narration": "接下来,我们的“挖土大师”又开始了他的森林探险。看这镜头,水滴在叶子上闪烁,仿佛在说:“快来,快来,这里有故事!”他一边挖洞,一边像个新手厨师试图切洋葱——每一下都小心翼翼,生怕自己不小心挖出个“历史遗址”。坐下休息的时候,脸上的表情就像发现新大陆一样!然后,他拿起斧头砍树枝,简直是现代版的“神雕侠侣”,只不过对象是树木。最后,那堆树枝架过泥泞的小水坑,仿佛在说:“我就是不怕湿脚的勇士!”这就是我们的建造之旅!",
"OST": 2,
"new_timestamp": "00:51-01:37"
}
],
"video_clip_json_path": "E:\\projects\\NarratoAI\\resource\\scripts\\2024-1118-230421.json",
"video_origin_path": "E:\\projects\\NarratoAI\\resource\\videos\\test.mp4",
"video_aspect": "16:9",
"video_language": "zh-CN",
"voice_name": "zh-CN-YunjianNeural",
"voice_volume": 1,
"voice_rate": 1.2,
"voice_pitch": 1,
"bgm_name": "random",
"bgm_type": "random",
"bgm_file": "",
"bgm_volume": 0.3,
"subtitle_enabled": true,
"subtitle_position": "bottom",
"font_name": "STHeitiMedium.ttc",
"text_fore_color": "#FFFFFF",
"text_background_color": "transparent",
"font_size": 75,
"stroke_color": "#000000",
"stroke_width": 1.5,
"custom_position": 70,
"n_threads": 8
},
"subclip_videos": {
"00:10-01:01": "E:\\projects\\NarratoAI\\storage\\cache_videos/vid-00_10-01_01.mp4",
"01:07-01:53": "E:\\projects\\NarratoAI\\storage\\cache_videos/vid-01_07-01_53.mp4"
}
}
请在最外层新建一个pipeline 工作流执行逻辑的代码;
他会按照下面的顺序请求接口
1.下载视频
curl -X 'POST' \
'http://127.0.0.1:8080/api/v2/youtube/download' \
-H 'accept: application/json' \
-H 'Content-Type: application/json' \
-d '{
"url": "https://www.youtube.com/watch?v=Kenm35gdqtk",
"resolution": "1080p",
"output_format": "mp4",
"rename": "2024-11-19"
}'
2.生成脚本
curl -X 'POST' \
'http://127.0.0.1:8080/api/v2/scripts/generate' \
-H 'accept: application/json' \
-H 'Content-Type: application/json' \
-d '{
"video_path": "E:\\projects\\NarratoAI\\resource\\videos\\test.mp4",
"skip_seconds": 0,
"threshold": 30,
"vision_batch_size": 10,
"vision_llm_provider": "gemini"
}'
3. 剪辑视频
curl -X 'POST' \
'http://127.0.0.1:8080/api/v2/scripts/crop' \
-H 'accept: application/json' \
-H 'Content-Type: application/json' \
-d '{
"video_origin_path": "E:\\projects\\NarratoAI\\resource\\videos\\test.mp4",
"video_script": [
{
"timestamp": "00:10-01:01",
"picture": "好的,以下是视频画面的客观描述:\n\n视频展现一名留着胡须的男子在森林里挖掘。\n\n画面首先展现男子从后方视角背着军绿色背包穿着卡其色长裤和深色T恤走向一个泥土斜坡。背包上似乎有一个镐头。\n\n下一个镜头特写展现了该背包一个镐头从背包里伸出来包里还有一些其他工具。\n\n然后视频显示该男子用镐头挖掘泥土斜坡。\n\n接下来是一些近景镜头展现男子的靴子在泥土中行走以及男子用手清理泥土。\n\n其他镜头从不同角度展现该男子在挖掘包括从侧面和上方。\n\n可以看到他用工具挖掘清理泥土并检查挖出的土壤。\n\n最后一个镜头展现了挖出的土壤的质地和颜色。",
"narration": "好的,接下来就是我们这位“胡须大侠”的精彩冒险了!只见他背着军绿色的背包,迈着比我上班还不情愿的步伐走向那泥土斜坡。哎呀,这个背包可真是个宝贝,里面藏着一把镐头和一些工具,简直像是个随身携带的“建筑工具箱”! \n\n看他挥舞着镐头挖掘泥土的姿势仿佛在进行一场“挖土大赛”结果却比我做饭还要糟糕。泥土飞扬中他的靴子也成了“泥巴艺术家”。最后那堆色泽各异的土壤就像他心情的写照——五彩斑斓又略显混乱真是一次让人捧腹的建造之旅",
"OST": 2,
"new_timestamp": "00:00-00:51"
},
{
"timestamp": "01:07-01:53",
"picture": "好的,以下是视频画面的客观描述:\n\n视频以一系列森林环境的镜头开头。\n\n第一个镜头是一个特写镜头镜头中显示的是一些带有水滴的绿色叶子。\n\n第二个镜头显示一个留着胡须的男子在森林中挖掘一个洞。 他跪在地上,用工具挖土。\n\n第三个镜头是一个中等镜头显示同一个人坐在他挖好的洞边休息。\n\n第四个镜头显示该洞的内部结构该洞在树根和地面之间。\n\n第五个镜头显示该男子用斧头砍树枝。\n\n第六个镜头显示一堆树枝横跨一个泥泞的小水坑。\n\n第七个镜头显示更多茂盛的树叶和树枝在阳光下。\n\n第八个镜头显示更多茂盛的树叶和树枝。\n\n\n",
"narration": "接下来,我们的“挖土大师”又开始了他的森林探险。看这镜头,水滴在叶子上闪烁,仿佛在说:“快来,快来,这里有故事!”他一边挖洞,一边像个新手厨师试图切洋葱——每一下都小心翼翼,生怕自己不小心挖出个“历史遗址”。坐下休息的时候,脸上的表情就像发现新大陆一样!然后,他拿起斧头砍树枝,简直是现代版的“神雕侠侣”,只不过对象是树木。最后,那堆树枝架过泥泞的小水坑,仿佛在说:“我就是不怕湿脚的勇士!”这就是我们的建造之旅!",
"OST": 2,
"new_timestamp": "00:51-01:37"
}
]
}'
4.生成视频
curl -X 'POST' \
'http://127.0.0.1:8080/api/v2/scripts/start-subclip?task_id=12121' \
-H 'accept: application/json' \
-H 'Content-Type: application/json' \
-d '{
"request": {
"video_clip_json": [
{
"timestamp": "00:10-01:01",
"picture": "好的,以下是视频画面的客观描述:\n\n视频展现一名留着胡须的男子在森林里挖掘。\n\n画面首先展现男子从后方视角背着军绿色背包穿着卡其色长裤和深色T恤走向一个泥土斜坡。背包上似乎有一个镐头。\n\n下一个镜头特写展现了该背包一个镐头从背包里伸出来包里还有一些其他工具。\n\n然后视频显示该男子用镐头挖掘泥土斜坡。\n\n接下来是一些近景镜头展现男子的靴子在泥土中行走以及男子用手清理泥土。\n\n其他镜头从不同角度展现该男子在挖掘包括从侧面和上方。\n\n可以看到他用工具挖掘清理泥土并检查挖出的土壤。\n\n最后一个镜头展现了挖出的土壤的质地和颜色。",
"narration": "好的,接下来就是我们这位“胡须大侠”的精彩冒险了!只见他背着军绿色的背包,迈着比我上班还不情愿的步伐走向那泥土斜坡。哎呀,这个背包可真是个宝贝,里面藏着一把镐头和一些工具,简直像是个随身携带的“建筑工具箱”! \n\n看他挥舞着镐头挖掘泥土的姿势仿佛在进行一场“挖土大赛”结果却比我做饭还要糟糕。泥土飞扬中他的靴子也成了“泥巴艺术家”。最后那堆色泽各异的土壤就像他心情的写照——五彩斑斓又略显混乱真是一次让人捧腹的建造之旅",
"OST": 2,
"new_timestamp": "00:00-00:51"
},
{
"timestamp": "01:07-01:53",
"picture": "好的,以下是视频画面的客观描述:\n\n视频以一系列森林环境的镜头开头。\n\n第一个镜头是一个特写镜头镜头中显示的是一些带有水滴的绿色叶子。\n\n第二个镜头显示一个留着胡须的男子在森林中挖掘一个洞。 他跪在地上,用工具挖土。\n\n第三个镜头是一个中等镜头显示同一个人坐在他挖好的洞边休息。\n\n第四个镜头显示该洞的内部结构该洞在树根和地面之间。\n\n第五个镜头显示该男子用斧头砍树枝。\n\n第六个镜头显示一堆树枝横跨一个泥泞的小水坑。\n\n第七个镜头显示更多茂盛的树叶和树枝在阳光下。\n\n第八个镜头显示更多茂盛的树叶和树枝。\n\n\n",
"narration": "接下来,我们的“挖土大师”又开始了他的森林探险。看这镜头,水滴在叶子上闪烁,仿佛在说:“快来,快来,这里有故事!”他一边挖洞,一边像个新手厨师试图切洋葱——每一下都小心翼翼,生怕自己不小心挖出个“历史遗址”。坐下休息的时候,脸上的表情就像发现新大陆一样!然后,他拿起斧头砍树枝,简直是现代版的“神雕侠侣”,只不过对象是树木。最后,那堆树枝架过泥泞的小水坑,仿佛在说:“我就是不怕湿脚的勇士!”这就是我们的建造之旅!",
"OST": 2,
"new_timestamp": "00:51-01:37"
}
],
"video_clip_json_path": "E:\\projects\\NarratoAI\\resource\\scripts\\2024-1118-230421.json",
"video_origin_path": "E:\\projects\\NarratoAI\\resource\\videos\\test.mp4",
"video_aspect": "16:9",
"video_language": "zh-CN",
"voice_name": "zh-CN-YunjianNeural",
"voice_volume": 1,
"voice_rate": 1.2,
"voice_pitch": 1,
"bgm_name": "random",
"bgm_type": "random",
"bgm_file": "",
"bgm_volume": 0.3,
"subtitle_enabled": true,
"subtitle_position": "bottom",
"font_name": "STHeitiMedium.ttc",
"text_fore_color": "#FFFFFF",
"text_background_color": "transparent",
"font_size": 75,
"stroke_color": "#000000",
"stroke_width": 1.5,
"custom_position": 70,
"n_threads": 8
},
"subclip_videos": {
"00:10-01:01": "E:\\projects\\NarratoAI\\storage\\cache_videos/vid-00_10-01_01.mp4",
"01:07-01:53": "E:\\projects\\NarratoAI\\storage\\cache_videos/vid-01_07-01_53.mp4"
}
}'
请求1返回的参数是
{
"task_id": "4e9b575f-68c0-4ae1-b218-db42b67993d0",
"output_path": "E:\\projects\\NarratoAI\\resource\\videos\\2024-11-19.mp4",
"resolution": "1080p",
"format": "mp4",
"filename": "2024-11-19.mp4"
}
output_path需要传递给请求2
请求2返回数据为
{
"task_id": "04497017-953c-44b4-bf1d-9d8ed3ebbbce",
"script": [
{
"timestamp": "00:10-01:01",
"picture": "好的,以下是對影片畫面的客觀描述:\n\n影片顯示一名留著鬍鬚的男子在一處樹林茂密的斜坡上挖掘。\n\n畫面一男子從後方出現背著一個軍綠色的背包背包裡似乎裝有工具。他穿著卡其色的長褲和深色的登山鞋。\n\n畫面二特寫鏡頭顯示男子的背包一個舊的鎬頭從包裡露出來包裡還有其他工具包括一個鏟子。\n\n畫面三男子用鎬頭在斜坡上挖土背包放在他旁邊。\n\n畫面四特寫鏡頭顯示男子的登山鞋在泥土中。\n\n畫面五男子坐在斜坡上用手清理樹根和泥土。\n\n畫面六地上有一些鬆動的泥土和落葉。\n\n畫面七男子的背包近景鏡頭他正在挖掘。\n\n畫面八男子在斜坡上挖掘揚起一陣塵土。\n\n畫面九特寫鏡頭顯示男子用手清理泥土。\n\n畫面十特寫鏡頭顯示挖出的泥土剖面可以看到土壤的層次。",
"narration": "上一个画面是我在绝美的自然中,准备开启我的“土豪”挖掘之旅。现在,你们看到这位留着胡子的“大哥”,他背着个军绿色的包,里面装的可不仅仅是工具,还有我对生活的无限热爱(以及一丝不安)。看!这把旧镐头就像我的前任——用起来费劲,但又舍不得扔掉。\n\n他在斜坡上挖土泥土飞扬仿佛在跟大地进行一场“泥巴大战”。每一铲下去都能听到大地微微的呻吟哎呀我这颗小树根可比我当年的情感纠葛还难处理呢别担心这些泥土层次分明简直可以开个“泥土博物馆”。所以朋友们跟着我一起享受这场泥泞中的乐趣吧",
"OST": 2,
"new_timestamp": "00:00-00:51"
},
{
"timestamp": "01:07-01:53",
"picture": "好的,以下是對影片畫面內容的客觀描述:\n\n影片以一系列森林環境的鏡頭開始。第一個鏡頭展示了綠葉植物的特寫鏡頭葉子上有一些水珠。接下來的鏡頭是一個男人在森林裡挖掘一個小坑他跪在地上用鏟子挖土。\n\n接下來的鏡頭是同一個男人坐在他挖的坑旁邊望著前方。然後鏡頭顯示該坑的廣角鏡頭顯示其結構和大小。\n\n之後的鏡頭同一個男人在樹林裡劈柴。鏡頭最後呈現出一潭渾濁的水周圍環繞著樹枝。然後鏡頭又回到了森林裡生長茂盛的植物特寫鏡頭。",
"narration": "好嘞,朋友们,我们已经在泥土博物馆里捣鼓了一阵子,现在是时候跟大自然亲密接触了!看看这片森林,绿叶上水珠闪闪发光,就像我曾经的爱情,虽然短暂,却美得让人心碎。\n\n现在我在这里挖个小坑感觉自己就像是一位新晋“挖土大王”不过说实话这手艺真不敢恭维连铲子都快对我崩溃了。再说劈柴这动作简直比我前任的情绪波动还要激烈最后这一潭浑浊的水别担心它只是告诉我生活就像这水总有些杂质但也别忘了要勇敢面对哦",
"OST": 2,
"new_timestamp": "00:51-01:37"
}
]
}
output_path和script参数需要传递给请求3
请求3返回参数是
{
"task_id": "b6f5a98a-b2e0-4e3d-89c5-64fb90db2ec1",
"subclip_videos": {
"00:10-01:01": "E:\\projects\\NarratoAI\\storage\\cache_videos/vid-00_10-01_01.mp4",
"01:07-01:53": "E:\\projects\\NarratoAI\\storage\\cache_videos/vid-01_07-01_53.mp4"
}
}
subclip_videos和 output_path和script参数需要传递给请求4
最后完成工作流
0代表只播放文案音频禁用视频原声1代表只播放视频原声不需要播放文案音频和字幕2代表即播放文案音频也要播放视频原声

View File

@ -20,7 +20,7 @@ def render_audio_panel(tr):
def render_tts_settings(tr):
"""渲染TTS(文本转语音)设置"""
# 获取支持的语音列表
support_locales = ["zh-CN", "zh-HK", "zh-TW", "en-US"]
support_locales = ["zh-CN"]
voices = voice.get_all_azure_voices(filter_locals=support_locales)
# 创建友好的显示名称

View File

@ -52,18 +52,34 @@ def render_language_settings(tr):
def render_proxy_settings(tr):
"""渲染代理设置"""
proxy_url_http = config.proxy.get("http", "") or os.getenv("VPN_PROXY_URL", "")
proxy_url_https = config.proxy.get("https", "") or os.getenv("VPN_PROXY_URL", "")
# 获取当前代理状态
proxy_enabled = config.proxy.get("enabled", True)
proxy_url_http = config.proxy.get("http")
proxy_url_https = config.proxy.get("https")
HTTP_PROXY = st.text_input(tr("HTTP_PROXY"), value=proxy_url_http)
HTTPS_PROXY = st.text_input(tr("HTTPs_PROXY"), value=proxy_url_https)
# 添加代理开关
proxy_enabled = st.checkbox(tr("Enable Proxy"), value=proxy_enabled)
# 保存代理开关状态
config.proxy["enabled"] = proxy_enabled
if HTTP_PROXY:
config.proxy["http"] = HTTP_PROXY
os.environ["HTTP_PROXY"] = HTTP_PROXY
if HTTPS_PROXY:
config.proxy["https"] = HTTPS_PROXY
os.environ["HTTPS_PROXY"] = HTTPS_PROXY
# 只有在代理启用时才显示代理设置输入框
if proxy_enabled:
HTTP_PROXY = st.text_input(tr("HTTP_PROXY"), value=proxy_url_http)
HTTPS_PROXY = st.text_input(tr("HTTPs_PROXY"), value=proxy_url_https)
if HTTP_PROXY:
config.proxy["http"] = HTTP_PROXY
os.environ["HTTP_PROXY"] = HTTP_PROXY
if HTTPS_PROXY:
config.proxy["https"] = HTTPS_PROXY
os.environ["HTTPS_PROXY"] = HTTPS_PROXY
else:
# 当代理被禁用时,清除环境变量和配置
os.environ.pop("HTTP_PROXY", None)
os.environ.pop("HTTPS_PROXY", None)
config.proxy["http"] = ""
config.proxy["https"] = ""
def test_vision_model_connection(api_key, base_url, model_name, provider, tr):
@ -90,6 +106,28 @@ def test_vision_model_connection(api_key, base_url, model_name, provider, tr):
except Exception as e:
return False, f"{tr('gemini model is not available')}: {str(e)}"
elif provider.lower() == 'qwenvl':
from openai import OpenAI
try:
client = OpenAI(
api_key=api_key,
base_url=base_url or "https://dashscope.aliyuncs.com/compatible-mode/v1"
)
# 发送一个简单的测试请求
response = client.chat.completions.create(
model=model_name or "qwen-vl-max-latest",
messages=[{"role": "user", "content": "直接回复我文本'当前网络可用'"}]
)
if response and response.choices:
return True, tr("QwenVL model is available")
else:
return False, tr("QwenVL model returned invalid response")
except Exception as e:
return False, f"{tr('QwenVL model is not available')}: {str(e)}"
elif provider.lower() == 'narratoapi':
import requests
try:
@ -116,7 +154,7 @@ def render_vision_llm_settings(tr):
st.subheader(tr("Vision Model Settings"))
# 视频分析模型提供商选择
vision_providers = ['Gemini', 'NarratoAPI(待发布)', 'QwenVL(待发布)']
vision_providers = ['Gemini', 'QwenVL', 'NarratoAPI(待发布)']
saved_vision_provider = config.app.get("vision_llm_provider", "Gemini").lower()
saved_provider_index = 0
@ -142,18 +180,33 @@ def render_vision_llm_settings(tr):
# 渲染视觉模型配置输入框
st_vision_api_key = st.text_input(tr("Vision API Key"), value=vision_api_key, type="password")
# 当选择 Gemini 时禁用 base_url 输入
if vision_provider.lower() == 'gemini':
# 根据不同提供商设置默认值和帮助信息
if vision_provider == 'gemini':
st_vision_base_url = st.text_input(
tr("Vision Base URL"),
value=vision_base_url,
disabled=True,
help=tr("Gemini API does not require a base URL")
)
st_vision_model_name = st.text_input(
tr("Vision Model Name"),
value=vision_model_name or "gemini-1.5-flash",
help=tr("Default: gemini-1.5-flash")
)
elif vision_provider == 'qwenvl':
st_vision_base_url = st.text_input(
tr("Vision Base URL"),
value=vision_base_url or "https://dashscope.aliyuncs.com/compatible-mode/v1",
help=tr("Default: https://dashscope.aliyuncs.com/compatible-mode/v1")
)
st_vision_model_name = st.text_input(
tr("Vision Model Name"),
value=vision_model_name or "qwen-vl-max-latest",
help=tr("Default: qwen-vl-max-latest")
)
else:
st_vision_base_url = st.text_input(tr("Vision Base URL"), value=vision_base_url)
st_vision_model_name = st.text_input(tr("Vision Model Name"), value=vision_model_name)
st_vision_model_name = st.text_input(tr("Vision Model Name"), value=vision_model_name)
# 在配置输入框后添加测试按钮
if st.button(tr("Test Connection"), key="test_vision_connection"):
@ -174,7 +227,7 @@ def render_vision_llm_settings(tr):
# 保存视觉模型配置
if st_vision_api_key:
config.app[f"vision_{vision_provider}_api_key"] = st_vision_api_key
st.session_state[f"vision_{vision_provider}_api_key"] = st_vision_api_key # 用于script_settings.py
st.session_state[f"vision_{vision_provider}_api_key"] = st_vision_api_key
if st_vision_base_url:
config.app[f"vision_{vision_provider}_base_url"] = st_vision_base_url
st.session_state[f"vision_{vision_provider}_base_url"] = st_vision_base_url
@ -182,81 +235,6 @@ def render_vision_llm_settings(tr):
config.app[f"vision_{vision_provider}_model_name"] = st_vision_model_name
st.session_state[f"vision_{vision_provider}_model_name"] = st_vision_model_name
# # NarratoAPI 特殊配置
# if vision_provider == 'narratoapi':
# st.subheader(tr("Narrato Additional Settings"))
#
# # Narrato API 基础配置
# narrato_api_key = st.text_input(
# tr("Narrato API Key"),
# value=config.app.get("narrato_api_key", ""),
# type="password",
# help="用于访问 Narrato API 的密钥"
# )
# if narrato_api_key:
# config.app["narrato_api_key"] = narrato_api_key
# st.session_state['narrato_api_key'] = narrato_api_key
#
# narrato_api_url = st.text_input(
# tr("Narrato API URL"),
# value=config.app.get("narrato_api_url", "http://127.0.0.1:8000/api/v1/video/analyze")
# )
# if narrato_api_url:
# config.app["narrato_api_url"] = narrato_api_url
# st.session_state['narrato_api_url'] = narrato_api_url
#
# # 视频分析模型配置
# st.markdown("##### " + tr("Vision Model Settings"))
# narrato_vision_model = st.text_input(
# tr("Vision Model Name"),
# value=config.app.get("narrato_vision_model", "gemini-1.5-flash")
# )
# narrato_vision_key = st.text_input(
# tr("Vision Model API Key"),
# value=config.app.get("narrato_vision_key", ""),
# type="password",
# help="用于视频分析的模 API Key"
# )
#
# if narrato_vision_model:
# config.app["narrato_vision_model"] = narrato_vision_model
# st.session_state['narrato_vision_model'] = narrato_vision_model
# if narrato_vision_key:
# config.app["narrato_vision_key"] = narrato_vision_key
# st.session_state['narrato_vision_key'] = narrato_vision_key
#
# # 文案生成模型配置
# st.markdown("##### " + tr("Text Generation Model Settings"))
# narrato_llm_model = st.text_input(
# tr("LLM Model Name"),
# value=config.app.get("narrato_llm_model", "qwen-plus")
# )
# narrato_llm_key = st.text_input(
# tr("LLM Model API Key"),
# value=config.app.get("narrato_llm_key", ""),
# type="password",
# help="用于文案生成的模型 API Key"
# )
#
# if narrato_llm_model:
# config.app["narrato_llm_model"] = narrato_llm_model
# st.session_state['narrato_llm_model'] = narrato_llm_model
# if narrato_llm_key:
# config.app["narrato_llm_key"] = narrato_llm_key
# st.session_state['narrato_llm_key'] = narrato_llm_key
#
# # 批处理配置
# narrato_batch_size = st.number_input(
# tr("Batch Size"),
# min_value=1,
# max_value=50,
# value=config.app.get("narrato_batch_size", 10),
# help="每批处理的图片数量"
# )
# if narrato_batch_size:
# config.app["narrato_batch_size"] = narrato_batch_size
# st.session_state['narrato_batch_size'] = narrato_batch_size
def test_text_model_connection(api_key, base_url, model_name, provider, tr):
"""测试文本模型连接
@ -328,6 +306,7 @@ def test_text_model_connection(api_key, base_url, model_name, provider, tr):
except Exception as e:
return False, f"{tr('Connection failed')}: {str(e)}"
def render_text_llm_settings(tr):
"""渲染文案生成模型设置"""
st.subheader(tr("Text Generation Model Settings"))

View File

@ -0,0 +1,303 @@
import os
import time
import math
import sys
import tempfile
import traceback
import shutil
import streamlit as st
from loguru import logger
from typing import List, Dict, Tuple
from dataclasses import dataclass
from streamlit.runtime.uploaded_file_manager import UploadedFile
from webui.utils.merge_video import merge_videos_and_subtitles
from app.utils.utils import video_dir, srt_dir
from app.services.subtitle import extract_audio_and_create_subtitle
# 定义临时目录路径
TEMP_MERGE_DIR = os.path.join("storage", "temp", "merge")
# 确保临时目录存在
os.makedirs(TEMP_MERGE_DIR, exist_ok=True)
@dataclass
class VideoSubtitlePair:
video_file: UploadedFile | None
subtitle_file: str | None
base_name: str
order: int = 0
def save_uploaded_file(uploaded_file: UploadedFile, target_dir: str) -> str:
"""Save uploaded file to target directory and return the file path"""
file_path = os.path.join(target_dir, uploaded_file.name)
# 如果文件已存在,先删除它
if os.path.exists(file_path):
os.remove(file_path)
with open(file_path, "wb") as f:
f.write(uploaded_file.getvalue())
return file_path
def clean_temp_dir():
"""清空临时目录"""
if os.path.exists(TEMP_MERGE_DIR):
for file in os.listdir(TEMP_MERGE_DIR):
file_path = os.path.join(TEMP_MERGE_DIR, file)
try:
if os.path.isfile(file_path):
os.unlink(file_path)
except Exception as e:
logger.error(f"清理临时文件失败: {str(e)}")
def group_files(files: List[UploadedFile]) -> Dict[str, VideoSubtitlePair]:
"""Group uploaded files by their base names"""
pairs = {}
order_counter = 0
# 首先处理所有视频文件
for file in files:
base_name = os.path.splitext(file.name)[0]
ext = os.path.splitext(file.name)[1].lower()
if ext == ".mp4":
if base_name not in pairs:
pairs[base_name] = VideoSubtitlePair(None, None, base_name, order_counter)
order_counter += 1
pairs[base_name].video_file = file
# 保存视频文件到临时目录
video_path = save_uploaded_file(file, TEMP_MERGE_DIR)
# 然后处理所有字幕文件
for file in files:
base_name = os.path.splitext(file.name)[0]
ext = os.path.splitext(file.name)[1].lower()
if ext == ".srt":
# 即使没有对应视频也保存字幕文件
subtitle_path = os.path.join(TEMP_MERGE_DIR, f"{base_name}.srt")
save_uploaded_file(file, TEMP_MERGE_DIR)
if base_name in pairs: # 如果有对应的视频
pairs[base_name].subtitle_file = subtitle_path
return pairs
def render_merge_settings(tr):
"""Render the merge settings section"""
with st.expander(tr("Video Subtitle Merge"), expanded=False):
# 上传文件区域
uploaded_files = st.file_uploader(
tr("Upload Video and Subtitle Files"),
type=["mp4", "srt"],
accept_multiple_files=True,
key="merge_files"
)
if uploaded_files:
all_pairs = group_files(uploaded_files)
if all_pairs:
st.write(tr("All Uploaded Files"))
# 初始化或更新session state中的排序信息
if 'file_orders' not in st.session_state:
st.session_state.file_orders = {
name: pair.order for name, pair in all_pairs.items()
}
st.session_state.needs_reorder = False
# 确保所有新文件都有排序值
for name, pair in all_pairs.items():
if name not in st.session_state.file_orders:
st.session_state.file_orders[name] = pair.order
# 移除不存在的文件的排序值
st.session_state.file_orders = {
k: v for k, v in st.session_state.file_orders.items()
if k in all_pairs
}
# 按照排序值对文件对进行排序
sorted_pairs = sorted(
all_pairs.items(),
key=lambda x: st.session_state.file_orders[x[0]]
)
# 计算需要多少行来显示所有视频每行5个
num_pairs = len(sorted_pairs)
num_rows = (num_pairs + 4) // 5 # 向上取整,每行5个
# 遍历每一行
for row in range(num_rows):
# 创建5列
cols = st.columns(5)
# 在这一行中填充视频最多5个
for col_idx in range(5):
pair_idx = row * 5 + col_idx
if pair_idx < num_pairs:
base_name, pair = sorted_pairs[pair_idx]
with cols[col_idx]:
st.caption(base_name)
# 显示视频预览(如果存在)
video_path = os.path.join(TEMP_MERGE_DIR, f"{base_name}.mp4")
if os.path.exists(video_path):
st.video(video_path)
else:
st.warning(tr("Missing Video"))
# 显示字幕预览(如果存在)
subtitle_path = os.path.join(TEMP_MERGE_DIR, f"{base_name}.srt")
if os.path.exists(subtitle_path):
with open(subtitle_path, 'r', encoding='utf-8') as f:
subtitle_content = f.read()
st.markdown(tr("Subtitle Preview"))
st.text_area(
"Subtitle Content",
value=subtitle_content,
height=100, # 减高度以适应5列布局
label_visibility="collapsed",
key=f"subtitle_preview_{base_name}"
)
else:
st.warning(tr("Missing Subtitle"))
# 如果有视频但没有字幕,显示一键转录按钮
if os.path.exists(video_path):
if st.button(tr("One-Click Transcribe"), key=f"transcribe_{base_name}"):
with st.spinner(tr("Transcribing...")):
try:
# 生成字幕文件
result = extract_audio_and_create_subtitle(video_path, subtitle_path)
if result:
# 读取生成的字幕文件内容并显示预览
with open(subtitle_path, 'r', encoding='utf-8') as f:
subtitle_content = f.read()
st.markdown(tr("Subtitle Preview"))
st.text_area(
"Subtitle Content",
value=subtitle_content,
height=150,
label_visibility="collapsed",
key=f"subtitle_preview_transcribed_{base_name}"
)
st.success(tr("Transcription Complete!"))
# 更新pair的字幕文件路径
pair.subtitle_file = subtitle_path
else:
st.error(tr("Transcription Failed. Please try again."))
except Exception as e:
error_message = str(e)
logger.error(traceback.format_exc())
if "rate limit exceeded" in error_message.lower():
st.error(tr("API rate limit exceeded. Please wait about an hour and try again."))
elif "resource_exhausted" in error_message.lower():
st.error(tr("Resources exhausted. Please try again later."))
else:
st.error(f"{tr('Transcription Failed')}: {str(e)}")
# 排序输入框
order = st.number_input(
tr("Order"),
min_value=0,
value=st.session_state.file_orders[base_name],
key=f"order_{base_name}",
on_change=lambda: setattr(st.session_state, 'needs_reorder', True)
)
if order != st.session_state.file_orders[base_name]:
st.session_state.file_orders[base_name] = order
st.session_state.needs_reorder = True
# 如果需要重新排序,重新加载页面
if st.session_state.needs_reorder:
st.session_state.needs_reorder = False
st.rerun()
# 找出有完整视频和字幕的文件对
complete_pairs = {
k: v for k, v in all_pairs.items()
if os.path.exists(os.path.join(TEMP_MERGE_DIR, f"{k}.mp4")) and
os.path.exists(os.path.join(TEMP_MERGE_DIR, f"{k}.srt"))
}
# 合并按钮和结果显示
cols = st.columns([1, 2, 1])
with cols[0]:
st.write(f"{tr('Mergeable Files')}: {len(complete_pairs)}")
merge_videos_result = None
with cols[1]:
if st.button(tr("Merge All Files"), type="primary", use_container_width=True):
try:
# 获取排序后的完整文件对
sorted_complete_pairs = sorted(
[(k, v) for k, v in complete_pairs.items()],
key=lambda x: st.session_state.file_orders[x[0]]
)
video_paths = []
subtitle_paths = []
for base_name, _ in sorted_complete_pairs:
video_paths.append(os.path.join(TEMP_MERGE_DIR, f"{base_name}.mp4"))
subtitle_paths.append(os.path.join(TEMP_MERGE_DIR, f"{base_name}.srt"))
# 获取输出文件路径
output_video = os.path.join(video_dir(), f"merged_video_{time.strftime('%M%S')}.mp4")
output_subtitle = os.path.join(srt_dir(), f"merged_subtitle_{time.strftime('%M%S')}.srt")
with st.spinner(tr("Merging files...")):
# 合并文件
merge_videos_and_subtitles(
video_paths,
subtitle_paths,
output_video,
output_subtitle
)
success = True
error_msg = ""
# 检查输出文件是否成功生成
if not os.path.exists(output_video):
success = False
error_msg += tr("Failed to generate merged video. ")
if not os.path.exists(output_subtitle):
success = False
error_msg += tr("Failed to generate merged subtitle. ")
if success:
# 显示成功消息
st.success(tr("Merge completed!"))
merge_videos_result = (output_video, output_subtitle)
# 清理临时目录
clean_temp_dir()
else:
st.error(error_msg)
except Exception as e:
error_message = str(e)
if "moviepy" in error_message.lower():
st.error(tr("Error processing video files. Please check if the videos are valid MP4 files."))
elif "pysrt" in error_message.lower():
st.error(tr("Error processing subtitle files. Please check if the subtitles are valid SRT files."))
else:
st.error(f"{tr('Error during merge')}: {error_message}")
# 合并结果预览放在合并按钮下方
if merge_videos_result:
st.markdown(f"<h3 style='text-align: center'>{tr('Merge Result Preview')}</h3>", unsafe_allow_html=True)
# 使用列布局使视频居中
col1, col2, col3 = st.columns([1,2,1])
with col2:
st.video(merge_videos_result[0])
st.code(f"{tr('Video Path')}: {merge_videos_result[0]}")
st.code(f"{tr('Subtitle Path')}: {merge_videos_result[1]}")
else:
st.warning(tr("No Files Found"))

View File

@ -1,86 +1,15 @@
import os
import ssl
import glob
import json
import time
import asyncio
import traceback
from requests.adapters import HTTPAdapter
from urllib3.util.retry import Retry
import requests
import streamlit as st
from loguru import logger
from app.config import config
from app.models.schema import VideoClipParams
from app.utils.script_generator import ScriptProcessor
from app.utils import utils, check_script, vision_analyzer, video_processor, video_processor_v2
from webui.utils import file_utils
from app.utils import utils, check_script
from webui.tools.generate_script_docu import generate_script_docu
def get_batch_timestamps(batch_files, prev_batch_files=None):
"""
获取一批文件的时间戳范围
返回: (first_timestamp, last_timestamp, timestamp_range)
文件名格式: keyframe_001253_000050.jpg
其中 000050 表示 00:00:50 (50)
000101 表示 00:01:01 (1分1秒)
Args:
batch_files: 当前批次的文件列表
prev_batch_files: 上一个批次的文件列表用于处理单张图片的情况
"""
if not batch_files:
logger.warning("Empty batch files")
return "00:00", "00:00", "00:00-00:00"
# 如果当前批次只有一张图片,且有上一个批次的文件,则使用上一批次的最后一张作为首帧
if len(batch_files) == 1 and prev_batch_files and len(prev_batch_files) > 0:
first_frame = os.path.basename(prev_batch_files[-1])
last_frame = os.path.basename(batch_files[0])
logger.debug(f"单张图片批次,使用上一批次最后一帧作为首帧: {first_frame}")
else:
# 提取首帧和尾帧的时间戳
first_frame = os.path.basename(batch_files[0])
last_frame = os.path.basename(batch_files[-1])
# 从文件名中提取时间信息
first_time = first_frame.split('_')[2].replace('.jpg', '') # 000050
last_time = last_frame.split('_')[2].replace('.jpg', '') # 000101
# 转换为分:秒格式
def format_timestamp(time_str):
# 时间格式为 MMSS如 0050 表示 00:50, 0101 表示 01:01
if len(time_str) < 4:
logger.warning(f"Invalid timestamp format: {time_str}")
return "00:00"
minutes = int(time_str[-4:-2]) # 取后4位的前2位作为分钟
seconds = int(time_str[-2:]) # 取后2位作为秒数
# 处理进位
if seconds >= 60:
minutes += seconds // 60
seconds = seconds % 60
return f"{minutes:02d}:{seconds:02d}"
first_timestamp = format_timestamp(first_time)
last_timestamp = format_timestamp(last_time)
timestamp_range = f"{first_timestamp}-{last_timestamp}"
logger.debug(f"解析时间戳: {first_frame} -> {first_timestamp}, {last_frame} -> {last_timestamp}")
return first_timestamp, last_timestamp, timestamp_range
def get_batch_files(keyframe_files, result, batch_size=5):
"""
获取当前批次的图片文件
"""
batch_start = result['batch_index'] * batch_size
batch_end = min(batch_start + batch_size, len(keyframe_files))
return keyframe_files[batch_start:batch_end]
def render_script_panel(tr):
"""渲染脚本配置面板"""
with st.container(border=True):
@ -102,7 +31,11 @@ def render_script_panel(tr):
def render_script_file(tr, params):
"""渲染脚本文件选择"""
script_list = [(tr("None"), ""), (tr("Auto Generate"), "auto")]
script_list = [
(tr("None"), ""),
(tr("Auto Generate"), "auto"),
(tr("Upload Script"), "upload_script") # 新增上传脚本选项
]
# 获取已有脚本文件
suffix = "*.json"
@ -132,7 +65,7 @@ def render_script_file(tr, params):
selected_script_index = st.selectbox(
tr("Script Files"),
index=selected_index, # 使用找到的索引
index=selected_index,
options=range(len(script_list)),
format_func=lambda x: script_list[x][0]
)
@ -141,10 +74,50 @@ def render_script_file(tr, params):
st.session_state['video_clip_json_path'] = script_path
params.video_clip_json_path = script_path
# 处理脚本上传
if script_path == "upload_script":
uploaded_file = st.file_uploader(
tr("Upload Script File"),
type=["json"],
accept_multiple_files=False,
)
if uploaded_file is not None:
try:
# 读取上传的JSON内容并验证格式
script_content = uploaded_file.read().decode('utf-8')
json_data = json.loads(script_content)
# 保存到脚本目录
script_file_path = os.path.join(script_dir, uploaded_file.name)
file_name, file_extension = os.path.splitext(uploaded_file.name)
# 如果文件已存在,添加时间戳
if os.path.exists(script_file_path):
timestamp = time.strftime("%Y%m%d%H%M%S")
file_name_with_timestamp = f"{file_name}_{timestamp}"
script_file_path = os.path.join(script_dir, file_name_with_timestamp + file_extension)
# 写入文件
with open(script_file_path, "w", encoding='utf-8') as f:
json.dump(json_data, f, ensure_ascii=False, indent=2)
# 更新状态
st.success(tr("Script Uploaded Successfully"))
st.session_state['video_clip_json_path'] = script_file_path
params.video_clip_json_path = script_file_path
time.sleep(1)
st.rerun()
except json.JSONDecodeError:
st.error(tr("Invalid JSON format"))
except Exception as e:
st.error(f"{tr('Upload failed')}: {str(e)}")
def render_video_file(tr, params):
"""渲染视频文件选择"""
video_list = [(tr("None"), ""), (tr("Upload Local Files"), "local")]
video_list = [(tr("None"), ""), (tr("Upload Local Files"), "upload_local")]
# 获取已有视频文件
for suffix in ["*.mp4", "*.mov", "*.avi", "*.mkv"]:
@ -164,7 +137,7 @@ def render_video_file(tr, params):
st.session_state['video_origin_path'] = video_path
params.video_origin_path = video_path
if video_path == "local":
if video_path == "upload_local":
uploaded_file = st.file_uploader(
tr("Upload Local Files"),
type=["mp4", "mov", "avi", "flv", "mkv"],
@ -250,7 +223,7 @@ def render_script_buttons(tr, params):
if st.button(button_name, key="script_action", disabled=not script_path):
if script_path == "auto":
generate_script(tr, params)
generate_script_docu(tr, params)
else:
load_script(tr, script_path)
@ -305,379 +278,6 @@ def load_script(tr, script_path):
st.error(f"{tr('Failed to load script')}: {str(e)}")
def generate_script(tr, params):
"""生成视频脚本"""
progress_bar = st.progress(0)
status_text = st.empty()
def update_progress(progress: float, message: str = ""):
progress_bar.progress(progress)
if message:
status_text.text(f"{progress}% - {message}")
else:
status_text.text(f"进度: {progress}%")
try:
with st.spinner("正在生成脚本..."):
if not params.video_origin_path:
st.error("请先选择视频文件")
return
# ===================提取键帧===================
update_progress(10, "正在提取关键帧...")
# 创建临时目录用于存储关键帧
keyframes_dir = os.path.join(utils.temp_dir(), "keyframes")
video_hash = utils.md5(params.video_origin_path + str(os.path.getmtime(params.video_origin_path)))
video_keyframes_dir = os.path.join(keyframes_dir, video_hash)
# 检查是否已经提取过关键帧
keyframe_files = []
if os.path.exists(video_keyframes_dir):
# 取已有的关键帧文件
for filename in sorted(os.listdir(video_keyframes_dir)):
if filename.endswith('.jpg'):
keyframe_files.append(os.path.join(video_keyframes_dir, filename))
if keyframe_files:
logger.info(f"使用已缓存的关键帧: {video_keyframes_dir}")
st.info(f"使用已缓存的关键帧,如需重新提取请删除目录: {video_keyframes_dir}")
update_progress(20, f"使用已缓存关键帧,共 {len(keyframe_files)}")
# 如果没有缓存的关键帧,则进行提取
if not keyframe_files:
try:
# 确保目录存在
os.makedirs(video_keyframes_dir, exist_ok=True)
# 初始化视频处理器
if config.frames.get("version") == "v2":
processor = video_processor_v2.VideoProcessor(params.video_origin_path)
# 处理视频并提取关键帧
processor.process_video_pipeline(
output_dir=video_keyframes_dir,
skip_seconds=st.session_state.get('skip_seconds'),
threshold=st.session_state.get('threshold')
)
else:
processor = video_processor.VideoProcessor(params.video_origin_path)
# 处理视频并提取关键帧
processor.process_video(
output_dir=video_keyframes_dir,
skip_seconds=0
)
# 获取所有关键帧文件路径
for filename in sorted(os.listdir(video_keyframes_dir)):
if filename.endswith('.jpg'):
keyframe_files.append(os.path.join(video_keyframes_dir, filename))
if not keyframe_files:
raise Exception("未提取到任何关键帧")
update_progress(20, f"关键帧提取完成,共 {len(keyframe_files)}")
except Exception as e:
# 如果提取失败,清理创建的目录
try:
if os.path.exists(video_keyframes_dir):
import shutil
shutil.rmtree(video_keyframes_dir)
except Exception as cleanup_err:
logger.error(f"清理失败的关键帧目录时出错: {cleanup_err}")
raise Exception(f"关键帧提取失败: {str(e)}")
# 根据不同的 LLM 提供商处理
vision_llm_provider = st.session_state.get('vision_llm_providers').lower()
logger.debug(f"Vision LLM 提供商: {vision_llm_provider}")
if vision_llm_provider == 'gemini':
try:
# ===================初始化视觉分析器===================
update_progress(30, "正在初始化视觉分析器...")
# 从配置中获取 Gemini 相关配置
vision_api_key = st.session_state.get('vision_gemini_api_key')
vision_model = st.session_state.get('vision_gemini_model_name')
vision_base_url = st.session_state.get('vision_gemini_base_url')
if not vision_api_key or not vision_model:
raise ValueError("未配置 Gemini API Key 或者 模型,请在基础设置中配置")
analyzer = vision_analyzer.VisionAnalyzer(
model_name=vision_model,
api_key=vision_api_key,
)
update_progress(40, "正在分析关键帧...")
# ===================创建异步事件循环===================
loop = asyncio.new_event_loop()
asyncio.set_event_loop(loop)
# 执行异步分析
vision_batch_size = st.session_state.get('vision_batch_size') or config.frames.get("vision_batch_size")
results = loop.run_until_complete(
analyzer.analyze_images(
images=keyframe_files,
prompt=config.app.get('vision_analysis_prompt'),
batch_size=vision_batch_size
)
)
loop.close()
# ===================处理分析结果===================
update_progress(60, "正在整理分析结果...")
# 合并所有批次的析结果
frame_analysis = ""
prev_batch_files = None
for result in results:
if 'error' in result:
logger.warning(f"批次 {result['batch_index']} 处理出现警告: {result['error']}")
continue
# 获取当前批次的文件列表
batch_files = get_batch_files(keyframe_files, result, vision_batch_size)
logger.debug(f"批次 {result['batch_index']} 处理完成,共 {len(batch_files)} 张图片")
logger.debug(batch_files)
first_timestamp, last_timestamp, _ = get_batch_timestamps(batch_files, prev_batch_files)
logger.debug(f"处理时间戳: {first_timestamp}-{last_timestamp}")
# 添加带时间戳的分析结果
frame_analysis += f"\n=== {first_timestamp}-{last_timestamp} ===\n"
frame_analysis += result['response']
frame_analysis += "\n"
# 更新上一个批次的文件
prev_batch_files = batch_files
if not frame_analysis.strip():
raise Exception("未能生成有效的帧分析结果")
# 保存分析结果
analysis_path = os.path.join(utils.temp_dir(), "frame_analysis.txt")
with open(analysis_path, 'w', encoding='utf-8') as f:
f.write(frame_analysis)
update_progress(70, "正在生成脚本...")
# 从配置中获取文本生成相关配置
text_provider = config.app.get('text_llm_provider', 'gemini').lower()
text_api_key = config.app.get(f'text_{text_provider}_api_key')
text_model = config.app.get(f'text_{text_provider}_model_name')
text_base_url = config.app.get(f'text_{text_provider}_base_url')
# 构建帧内容列表
frame_content_list = []
prev_batch_files = None
for i, result in enumerate(results):
if 'error' in result:
continue
batch_files = get_batch_files(keyframe_files, result, vision_batch_size)
_, _, timestamp_range = get_batch_timestamps(batch_files, prev_batch_files)
frame_content = {
"timestamp": timestamp_range,
"picture": result['response'],
"narration": "",
"OST": 2
}
frame_content_list.append(frame_content)
logger.debug(f"添加帧内容: 时间范围={timestamp_range}, 分析结果长度={len(result['response'])}")
# 更新上一个批次的文件
prev_batch_files = batch_files
if not frame_content_list:
raise Exception("没有有效的帧内容可以处理")
# ===================开始生成文案===================
update_progress(80, "正在生成文案...")
# 校验配置
api_params = {
"vision_api_key": vision_api_key,
"vision_model_name": vision_model,
"vision_base_url": vision_base_url or "",
"text_api_key": text_api_key,
"text_model_name": text_model,
"text_base_url": text_base_url or ""
}
headers = {
'accept': 'application/json',
'Content-Type': 'application/json'
}
session = requests.Session()
retry_strategy = Retry(
total=3,
backoff_factor=1,
status_forcelist=[500, 502, 503, 504]
)
adapter = HTTPAdapter(max_retries=retry_strategy)
session.mount("https://", adapter)
try:
response = session.post(
f"{config.app.get('narrato_api_url')}/video/config",
headers=headers,
json=api_params,
timeout=30,
verify=True
)
except Exception as e:
pass
custom_prompt = st.session_state.get('custom_prompt', '')
processor = ScriptProcessor(
model_name=text_model,
api_key=text_api_key,
prompt=custom_prompt,
base_url=text_base_url or "",
video_theme=st.session_state.get('video_theme', '')
)
# 处理帧内容生成脚本
script_result = processor.process_frames(frame_content_list)
# <20><>结果转换为JSON字符串
script = json.dumps(script_result, ensure_ascii=False, indent=2)
except Exception as e:
logger.exception(f"大模型处理过程中发生错误\n{traceback.format_exc()}")
raise Exception(f"分析失败: {str(e)}")
elif vision_llm_provider == 'narratoapi': # NarratoAPI
try:
# 创建临时目录
temp_dir = utils.temp_dir("narrato")
# 打包关键帧
update_progress(30, "正在打包关键帧...")
zip_path = os.path.join(temp_dir, f"keyframes_{int(time.time())}.zip")
if not file_utils.create_zip(keyframe_files, zip_path):
raise Exception("打包关键帧失败")
# 获取API配置
api_url = st.session_state.get('narrato_api_url')
api_key = st.session_state.get('narrato_api_key')
if not api_key:
raise ValueError("未配置 Narrato API Key请在基础设置中配置")
# 准<><E58786><EFBFBD>API请求
headers = {
'X-API-Key': api_key,
'accept': 'application/json'
}
api_params = {
'batch_size': st.session_state.get('narrato_batch_size', 10),
'use_ai': False,
'start_offset': 0,
'vision_model': st.session_state.get('narrato_vision_model', 'gemini-1.5-flash'),
'vision_api_key': st.session_state.get('narrato_vision_key'),
'llm_model': st.session_state.get('narrato_llm_model', 'qwen-plus'),
'llm_api_key': st.session_state.get('narrato_llm_key'),
'custom_prompt': st.session_state.get('custom_prompt', '')
}
# 发送API请求
logger.info(f"请求NarratoAPI: {api_url}")
update_progress(40, "正在上传文件...")
with open(zip_path, 'rb') as f:
files = {'file': (os.path.basename(zip_path), f, 'application/x-zip-compressed')}
try:
response = requests.post(
f"{api_url}/video/analyze",
headers=headers,
params=api_params,
files=files,
timeout=30 # 设置超时时间
)
response.raise_for_status()
except requests.RequestException as e:
logger.error(f"Narrato API 请求失败:\n{traceback.format_exc()}")
raise Exception(f"API请求失败: {str(e)}")
task_data = response.json()
task_id = task_data["data"].get('task_id')
if not task_id:
raise Exception(f"无效的API响应: {response.text}")
# 轮询任务状态
update_progress(50, "正在等待分析结果...")
retry_count = 0
max_retries = 60 # 最多等待2分钟
while retry_count < max_retries:
try:
status_response = requests.get(
f"{api_url}/video/tasks/{task_id}",
headers=headers,
timeout=10
)
status_response.raise_for_status()
task_status = status_response.json()['data']
if task_status['status'] == 'SUCCESS':
script = task_status['result']['data']
break
elif task_status['status'] in ['FAILURE', 'RETRY']:
raise Exception(f"任务失败: {task_status.get('error')}")
retry_count += 1
time.sleep(2)
except requests.RequestException as e:
logger.warning(f"获取任务状态失败,重试中: {str(e)}")
retry_count += 1
time.sleep(2)
continue
if retry_count >= max_retries:
raise Exception("任务执行超时")
except Exception as e:
logger.exception(f"NarratoAPI 处理过程中发生错误\n{traceback.format_exc()}")
raise Exception(f"NarratoAPI 处理失败: {str(e)}")
finally:
# 清理临时文件
try:
if os.path.exists(zip_path):
os.remove(zip_path)
except Exception as e:
logger.warning(f"清理临时文件失败: {str(e)}")
else:
logger.exception("Vision Model 未启用,请检查配置")
if script is None:
st.error("生成脚本失败,请检查日志")
st.stop()
logger.info(f"脚本生成完成")
if isinstance(script, list):
st.session_state['video_clip_json'] = script
elif isinstance(script, str):
st.session_state['video_clip_json'] = json.loads(script)
update_progress(80, "脚本生成完成")
time.sleep(0.1)
progress_bar.progress(100)
status_text.text("脚本生成完成!")
st.success("视频脚本生成成功!")
except Exception as err:
st.error(f"生成过程中发生错误: {str(err)}")
logger.exception(f"生成脚本时发生错误\n{traceback.format_exc()}")
finally:
time.sleep(2)
progress_bar.empty()
status_text.empty()
def save_script(tr, video_clip_json_details):
"""保存视频脚本"""
if not video_clip_json_details:
@ -724,7 +324,7 @@ def crop_video(tr, params):
utils.cut_video(params, update_progress)
time.sleep(0.5)
progress_bar.progress(100)
status_text.text("完成!")
status_text.text("完成!")
st.success("视频剪辑成功完成!")
except Exception as e:
st.error(f"剪辑过程中发生错误: {str(e)}")
@ -732,14 +332,3 @@ def crop_video(tr, params):
time.sleep(2)
progress_bar.empty()
status_text.empty()
def get_script_params():
"""获取脚本参数"""
return {
'video_language': st.session_state.get('video_language', ''),
'video_clip_json_path': st.session_state.get('video_clip_json_path', ''),
'video_origin_path': st.session_state.get('video_origin_path', ''),
'video_name': st.session_state.get('video_name', ''),
'video_plot': st.session_state.get('video_plot', '')
}

View File

@ -0,0 +1,45 @@
import streamlit as st
import os
import shutil
from loguru import logger
from app.utils.utils import storage_dir
def clear_directory(dir_path, tr):
"""清理指定目录"""
if os.path.exists(dir_path):
try:
for item in os.listdir(dir_path):
item_path = os.path.join(dir_path, item)
try:
if os.path.isfile(item_path):
os.unlink(item_path)
elif os.path.isdir(item_path):
shutil.rmtree(item_path)
except Exception as e:
logger.error(f"Failed to delete {item_path}: {e}")
st.success(tr("Directory cleared"))
logger.info(f"Cleared directory: {dir_path}")
except Exception as e:
st.error(f"{tr('Failed to clear directory')}: {str(e)}")
logger.error(f"Failed to clear directory {dir_path}: {e}")
else:
st.warning(tr("Directory does not exist"))
def render_system_panel(tr):
"""渲染系统设置面板"""
with st.expander(tr("System settings"), expanded=False):
col1, col2, col3 = st.columns(3)
with col1:
if st.button(tr("Clear frames"), use_container_width=True):
clear_directory(os.path.join(storage_dir(), "temp/keyframes"), tr)
with col2:
if st.button(tr("Clear clip videos"), use_container_width=True):
clear_directory(os.path.join(storage_dir(), "temp/clip_video"), tr)
with col3:
if st.button(tr("Clear tasks"), use_container_width=True):
clear_directory(os.path.join(storage_dir(), "tasks"), tr)

View File

@ -15,7 +15,7 @@
"Crop Video": "裁剪视频",
"Video File": "视频文件(:blue[1⃣支持上传视频文件(限制2G) 2⃣大文件建议直接导入 ./resource/videos 目录]",
"Plot Description": "剧情描述 (:blue[可从 https://www.tvmao.com/ 获取])",
"Generate Video Keywords": "点击使用AI根据**文案**生成【视频关键<EFBFBD><EFBFBD>】",
"Generate Video Keywords": "点击使用AI根据**文案**生成【视频关键】",
"Please Enter the Video Subject": "请先填写视频文案",
"Generating Video Script and Keywords": "AI正在生成视频文案和关键词...",
"Generating Video Keywords": "AI正在生成视频关键词...",
@ -95,7 +95,7 @@
"Check Format": "脚本格式检查",
"Script Loaded Successfully": "脚本加载成功",
"Script format check passed": "脚本格式检查通过",
"Script format check failed": "脚本格式检查失<EFBFBD><EFBFBD>",
"Script format check failed": "脚本格式检查失",
"Failed to Load Script": "加载脚本失败",
"Failed to Save Script": "保存脚本失败",
"Script saved successfully": "脚本保存成功",
@ -103,7 +103,6 @@
"Video Quality": "视频质量",
"Custom prompt for LLM, leave empty to use default prompt": "自定义提示词,留空则使用默认提示词",
"Proxy Settings": "代理设置",
"Language": "界面语言",
"HTTP_PROXY": "HTTP 代理",
"HTTPs_PROXY": "HTTPS 代理",
"Vision Model Settings": "视频分析模型设置",
@ -134,6 +133,61 @@
"Unsupported provider": "不支持的提供商",
"0: Keep the audio only, 1: Keep the original sound only, 2: Keep the original sound and audio": "0: 仅保留音频1: 仅保留原声2: 保留原声和音频",
"Text model is not available": "文案生成模型不可用",
"Text model is available": "文案生成模型可用"
"Text model is available": "文案生成模型可用",
"Upload Script": "上传脚本",
"Upload Script File": "上传脚本文件",
"Script Uploaded Successfully": "脚本上传成功",
"Invalid JSON format": "无效的JSON格式",
"Upload failed": "上传失败",
"Video Subtitle Merge": "**合并视频与字幕**",
"Upload Video and Subtitle Files": "上传视频和字幕文件",
"Matched File Pairs": "已匹配的文件对",
"Merge All Files": "合并所有文件",
"Merge Function Not Implemented": "合并功能待实现",
"No Matched Pairs Found": "未找到匹配的文件对",
"Missing Subtitle": "缺少对应的字幕文件",
"Missing Video": "缺少对应的视频文件",
"All Uploaded Files": "所有上传的文件",
"Order": "排序序号",
"Reorder": "重新排序",
"Merging files...": "正在合并文件...",
"Merge completed!": "合并完成!",
"Download Merged Video": "下载合并后的视频",
"Download Merged Subtitle": "下载合并后的字幕",
"Error during merge": "合并过程中出错",
"Failed to generate merged video.": "生成合并视频失败。",
"Failed to generate merged subtitle.": "生成合并字幕失败。",
"Error reading merged video file": "读取合并后的视频文件时出错",
"Error reading merged subtitle file": "读取合并后的字幕文件时出错",
"Error processing video files. Please check if the videos are valid MP4 files.": "处理视频文件时出错。请检查视频是否为有效的MP4文件。",
"Error processing subtitle files. Please check if the subtitles are valid SRT files.": "处理字幕文件时出错。请检查字幕是否为有效的SRT文件。",
"Preview Merged Video": "预览合并后的视频",
"Video Path": "视频路径",
"Subtitle Path": "字幕路径",
"Enable Proxy": "启用代理",
"QwenVL model is available": "QwenVL 模型可用",
"QwenVL model is not available": "QwenVL 模型不可用",
"System settings": "系统设置",
"Clear Cache": "清理缓存",
"Cache cleared": "缓存清理完成",
"storage directory does not exist": "storage目录不存在",
"Failed to clear cache": "清理缓存失败",
"Clear frames": "清理关键帧",
"Clear clip videos": "清理裁剪视频",
"Clear tasks": "清理任务",
"Directory cleared": "目录清理完成",
"Directory does not exist": "目录不存在",
"Failed to clear directory": "清理目录失败",
"Subtitle Preview": "字幕预览",
"One-Click Transcribe": "一键转录",
"Transcribing...": "正在转录中...",
"Transcription Complete!": "转录完成!",
"Transcription Failed. Please try again.": "转录失败,请重试。",
"API rate limit exceeded. Please wait about an hour and try again.": "API 调用次数已达到限制,请等待约一小时后再试。",
"Resources exhausted. Please try again later.": "资源已耗尽,请稍后再试。",
"Transcription Failed": "转录失败",
"Mergeable Files": "可合并文件数",
"Subtitle Content": "字幕内容",
"Merge Result Preview": "合并结果预览"
}
}

141
webui/tools/base.py Normal file
View File

@ -0,0 +1,141 @@
import os
import streamlit as st
from loguru import logger
from app.utils import gemini_analyzer, qwenvl_analyzer
def create_vision_analyzer(provider, api_key, model, base_url):
"""
创建视觉分析器实例
Args:
provider: 提供商名称 ('gemini' 'qwenvl')
api_key: API密钥
model: 模型名称
base_url: API基础URL
Returns:
VisionAnalyzer QwenAnalyzer 实例
"""
if provider == 'gemini':
return gemini_analyzer.VisionAnalyzer(model_name=model, api_key=api_key)
elif provider == 'qwenvl':
# 只传入必要的参数
return qwenvl_analyzer.QwenAnalyzer(
model_name=model,
api_key=api_key,
base_url=base_url
)
else:
raise ValueError(f"不支持的视觉分析提供商: {provider}")
def get_script_params():
"""获取脚本参数"""
return {
'video_language': st.session_state.get('video_language', ''),
'video_clip_json_path': st.session_state.get('video_clip_json_path', ''),
'video_origin_path': st.session_state.get('video_origin_path', ''),
'video_name': st.session_state.get('video_name', ''),
'video_plot': st.session_state.get('video_plot', '')
}
def get_batch_timestamps(batch_files, prev_batch_files=None):
"""
解析一批文件的时间戳范围,支持毫秒级精度
Args:
batch_files: 当前批次的文件列表
prev_batch_files: 上一个批次的文件列表,用于处理单张图片的情况
Returns:
tuple: (first_timestamp, last_timestamp, timestamp_range)
时间戳格式: HH:MM:SS,mmm (::,毫秒)
例如: 00:00:50,100 表示50秒100毫秒
示例文件名格式:
keyframe_001253_000050100.jpg
其中 000050100 表示 00:00:50,100 (50秒100毫秒)
"""
if not batch_files:
logger.warning("Empty batch files")
return "00:00:00,000", "00:00:00,000", "00:00:00,000-00:00:00,000"
def get_frame_files():
"""获取首帧和尾帧文件名"""
if len(batch_files) == 1 and prev_batch_files and prev_batch_files:
# 单张图片情况:使用上一批次最后一帧作为首帧
first = os.path.basename(prev_batch_files[-1])
last = os.path.basename(batch_files[0])
logger.debug(f"单张图片批次,使用上一批次最后一帧作为首帧: {first}")
else:
first = os.path.basename(batch_files[0])
last = os.path.basename(batch_files[-1])
return first, last
def extract_time(filename):
"""从文件名提取时间信息"""
try:
# 提取类似 000050100 的时间戳部分
time_str = filename.split('_')[2].replace('.jpg', '')
if len(time_str) < 9: # 处理旧格式
time_str = time_str.ljust(9, '0')
return time_str
except (IndexError, AttributeError) as e:
logger.warning(f"Invalid filename format: {filename}, error: {e}")
return "000000000"
def format_timestamp(time_str):
"""
将时间字符串转换为 HH:MM:SS,mmm 格式
Args:
time_str: 9位数字字符串,格式为 HHMMSSMMM
例如: 000010000 表示 00时00分10秒000毫秒
000043039 表示 00时00分43秒039毫秒
Returns:
str: HH:MM:SS,mmm 格式的时间戳
"""
try:
if len(time_str) < 9:
logger.warning(f"Invalid timestamp format: {time_str}")
return "00:00:00,000"
# 从时间戳中提取时、分、秒和毫秒
hours = int(time_str[0:2]) # 前2位作为小时
minutes = int(time_str[2:4]) # 第3-4位作为分钟
seconds = int(time_str[4:6]) # 第5-6位作为秒数
milliseconds = int(time_str[6:]) # 最后3位作为毫秒
return f"{hours:02d}:{minutes:02d}:{seconds:02d},{milliseconds:03d}"
except ValueError as e:
logger.warning(f"时间戳格式转换失败: {time_str}, error: {e}")
return "00:00:00,000"
# 获取首帧和尾帧文件名
first_frame, last_frame = get_frame_files()
# 从文件名中提取时间信息
first_time = extract_time(first_frame)
last_time = extract_time(last_frame)
# 转换为标准时间戳格式
first_timestamp = format_timestamp(first_time)
last_timestamp = format_timestamp(last_time)
timestamp_range = f"{first_timestamp}-{last_timestamp}"
# logger.debug(f"解析时间戳: {first_frame} -> {first_timestamp}, {last_frame} -> {last_timestamp}")
return first_timestamp, last_timestamp, timestamp_range
def get_batch_files(keyframe_files, result, batch_size=5):
"""
获取当前批次的图片文件
"""
batch_start = result['batch_index'] * batch_size
batch_end = min(batch_start + batch_size, len(keyframe_files))
return keyframe_files[batch_start:batch_end]

View File

@ -0,0 +1,293 @@
# 纪录片脚本生成
import os
import json
import time
import asyncio
import traceback
import requests
import streamlit as st
from loguru import logger
from requests.adapters import HTTPAdapter
from urllib3.util.retry import Retry
from app.config import config
from app.utils.script_generator import ScriptProcessor
from app.utils import utils, video_processor, video_processor_v2, qwenvl_analyzer
from webui.tools.base import create_vision_analyzer, get_batch_files, get_batch_timestamps
def generate_script_docu(tr, params):
"""
生成 纪录片 视频脚本
"""
progress_bar = st.progress(0)
status_text = st.empty()
def update_progress(progress: float, message: str = ""):
progress_bar.progress(progress)
if message:
status_text.text(f"{progress}% - {message}")
else:
status_text.text(f"进度: {progress}%")
try:
with st.spinner("正在生成脚本..."):
if not params.video_origin_path:
st.error("请先选择视频文件")
return
# ===================提取键帧===================
update_progress(10, "正在提取关键帧...")
# 创建临时目录用于存储关键帧
keyframes_dir = os.path.join(utils.temp_dir(), "keyframes")
video_hash = utils.md5(params.video_origin_path + str(os.path.getmtime(params.video_origin_path)))
video_keyframes_dir = os.path.join(keyframes_dir, video_hash)
# 检查是否已经提取过关键帧
keyframe_files = []
if os.path.exists(video_keyframes_dir):
# 取已有的关键帧文件
for filename in sorted(os.listdir(video_keyframes_dir)):
if filename.endswith('.jpg'):
keyframe_files.append(os.path.join(video_keyframes_dir, filename))
if keyframe_files:
logger.info(f"使用已缓存的关键帧: {video_keyframes_dir}")
st.info(f"使用已缓存的关键帧,如需重新提取请删除目录: {video_keyframes_dir}")
update_progress(20, f"使用已缓存关键帧,共 {len(keyframe_files)}")
# 如果没有缓存的关键帧,则进行提取
if not keyframe_files:
try:
# 确保目录存在
os.makedirs(video_keyframes_dir, exist_ok=True)
# 初始化视频处理器
if config.frames.get("version") == "v2":
processor = video_processor_v2.VideoProcessor(params.video_origin_path)
# 处理视频并提取关键帧
processor.process_video_pipeline(
output_dir=video_keyframes_dir,
skip_seconds=st.session_state.get('skip_seconds'),
threshold=st.session_state.get('threshold')
)
else:
processor = video_processor.VideoProcessor(params.video_origin_path)
# 处理视频并提取关键帧
processor.process_video(
output_dir=video_keyframes_dir,
skip_seconds=0
)
# 获取所有关键文件路径
for filename in sorted(os.listdir(video_keyframes_dir)):
if filename.endswith('.jpg'):
keyframe_files.append(os.path.join(video_keyframes_dir, filename))
if not keyframe_files:
raise Exception("未提取到任何关键帧")
update_progress(20, f"关键帧提取完成,共 {len(keyframe_files)}")
except Exception as e:
# 如果提取失败,清理创建的目录
try:
if os.path.exists(video_keyframes_dir):
import shutil
shutil.rmtree(video_keyframes_dir)
except Exception as cleanup_err:
logger.error(f"清理失败的关键帧目录时出错: {cleanup_err}")
raise Exception(f"关键帧提取失败: {str(e)}")
# 根据不同的 LLM 提供商处理
vision_llm_provider = st.session_state.get('vision_llm_providers').lower()
logger.debug(f"Vision LLM 提供商: {vision_llm_provider}")
try:
# ===================初始化视觉分析器===================
update_progress(30, "正在初始化视觉分析器...")
# 从配置中获取相关配置
if vision_llm_provider == 'gemini':
vision_api_key = st.session_state.get('vision_gemini_api_key')
vision_model = st.session_state.get('vision_gemini_model_name')
vision_base_url = st.session_state.get('vision_gemini_base_url')
elif vision_llm_provider == 'qwenvl':
vision_api_key = st.session_state.get('vision_qwenvl_api_key')
vision_model = st.session_state.get('vision_qwenvl_model_name', 'qwen-vl-max-latest')
vision_base_url = st.session_state.get('vision_qwenvl_base_url',
'https://dashscope.aliyuncs.com/compatible-mode/v1')
else:
raise ValueError(f"不支持的视觉分析提供商: {vision_llm_provider}")
# 创建视觉分析器实例
analyzer = create_vision_analyzer(
provider=vision_llm_provider,
api_key=vision_api_key,
model=vision_model,
base_url=vision_base_url
)
update_progress(40, "正在分析关键帧...")
# ===================创建异步事件循环===================
loop = asyncio.new_event_loop()
asyncio.set_event_loop(loop)
# 执行异步分析
vision_batch_size = st.session_state.get('vision_batch_size') or config.frames.get("vision_batch_size")
results = loop.run_until_complete(
analyzer.analyze_images(
images=keyframe_files,
prompt=config.app.get('vision_analysis_prompt'),
batch_size=vision_batch_size
)
)
loop.close()
# ===================处理分析结果===================
update_progress(60, "正在整理分析结果...")
# 合并所有批次的析结果
frame_analysis = ""
prev_batch_files = None
for result in results:
if 'error' in result:
logger.warning(f"批次 {result['batch_index']} 处理出现警告: {result['error']}")
# 获取当前批次的文件列表 keyframe_001136_000045.jpg 将 000045 精度提升到 毫秒
batch_files = get_batch_files(keyframe_files, result, vision_batch_size)
logger.debug(f"批次 {result['batch_index']} 处理完成,共 {len(batch_files)} 张图片")
# logger.debug(batch_files)
first_timestamp, last_timestamp, _ = get_batch_timestamps(batch_files, prev_batch_files)
logger.debug(f"处理时间戳: {first_timestamp}-{last_timestamp}")
# 添加带时间戳的分析结果
frame_analysis += f"\n=== {first_timestamp}-{last_timestamp} ===\n"
frame_analysis += result['response']
frame_analysis += "\n"
# 更新上一个批次的文件
prev_batch_files = batch_files
if not frame_analysis.strip():
raise Exception("未能生成有效的帧分析结果")
# 保存分析结果
analysis_path = os.path.join(utils.temp_dir(), "frame_analysis.txt")
with open(analysis_path, 'w', encoding='utf-8') as f:
f.write(frame_analysis)
update_progress(70, "正在生成脚本...")
# 从配置中获取文本生成相关配置
text_provider = config.app.get('text_llm_provider', 'gemini').lower()
text_api_key = config.app.get(f'text_{text_provider}_api_key')
text_model = config.app.get(f'text_{text_provider}_model_name')
text_base_url = config.app.get(f'text_{text_provider}_base_url')
# 构建帧内容列表
frame_content_list = []
prev_batch_files = None
for i, result in enumerate(results):
if 'error' in result:
continue
batch_files = get_batch_files(keyframe_files, result, vision_batch_size)
_, _, timestamp_range = get_batch_timestamps(batch_files, prev_batch_files)
frame_content = {
"timestamp": timestamp_range,
"picture": result['response'],
"narration": "",
"OST": 2
}
frame_content_list.append(frame_content)
logger.debug(f"添加帧内容: 时间范围={timestamp_range}, 分析结果长度={len(result['response'])}")
# 更新上一个批次的文件
prev_batch_files = batch_files
if not frame_content_list:
raise Exception("没有有效的帧内容可以处理")
# ===================开始生成文案===================
update_progress(80, "正在生成文案...")
# 校验配置
api_params = {
"vision_api_key": vision_api_key,
"vision_model_name": vision_model,
"vision_base_url": vision_base_url or "",
"text_api_key": text_api_key,
"text_model_name": text_model,
"text_base_url": text_base_url or ""
}
headers = {
'accept': 'application/json',
'Content-Type': 'application/json'
}
session = requests.Session()
retry_strategy = Retry(
total=3,
backoff_factor=1,
status_forcelist=[500, 502, 503, 504]
)
adapter = HTTPAdapter(max_retries=retry_strategy)
session.mount("https://", adapter)
try:
response = session.post(
f"{config.app.get('narrato_api_url')}/video/config",
headers=headers,
json=api_params,
timeout=30,
verify=True
)
except Exception as e:
pass
custom_prompt = st.session_state.get('custom_prompt', '')
processor = ScriptProcessor(
model_name=text_model,
api_key=text_api_key,
prompt=custom_prompt,
base_url=text_base_url or "",
video_theme=st.session_state.get('video_theme', '')
)
# 处理帧内容生成脚本
script_result = processor.process_frames(frame_content_list)
# 结果转换为JSON字符串
script = json.dumps(script_result, ensure_ascii=False, indent=2)
except Exception as e:
logger.exception(f"大模型处理过程中发生错误\n{traceback.format_exc()}")
raise Exception(f"分析失败: {str(e)}")
if script is None:
st.error("生成脚本失败,请检查日志")
st.stop()
logger.info(f"脚本生成完成")
if isinstance(script, list):
st.session_state['video_clip_json'] = script
elif isinstance(script, str):
st.session_state['video_clip_json'] = json.loads(script)
update_progress(80, "脚本生成完成")
time.sleep(0.1)
progress_bar.progress(100)
status_text.text("脚本生成完成!")
st.success("视频脚本生成成功!")
except Exception as err:
st.error(f"生成过程中发生错误: {str(err)}")
logger.exception(f"生成脚本时发生错误\n{traceback.format_exc()}")
finally:
time.sleep(2)
progress_bar.empty()
status_text.empty()

115
webui/utils/merge_video.py Normal file
View File

@ -0,0 +1,115 @@
"""
合并视频和字幕文件
"""
from moviepy.editor import VideoFileClip, concatenate_videoclips
import pysrt
import os
def get_video_duration(video_path):
"""获取视频时长(秒)"""
video = VideoFileClip(video_path)
duration = video.duration
video.close()
return duration
def adjust_subtitle_timing(subtitle_path, time_offset):
"""调整字幕时间戳"""
subs = pysrt.open(subtitle_path)
# 为每个字幕项添加时间偏移
for sub in subs:
sub.start.hours += int(time_offset / 3600)
sub.start.minutes += int((time_offset % 3600) / 60)
sub.start.seconds += int(time_offset % 60)
sub.start.milliseconds += int((time_offset * 1000) % 1000)
sub.end.hours += int(time_offset / 3600)
sub.end.minutes += int((time_offset % 3600) / 60)
sub.end.seconds += int(time_offset % 60)
sub.end.milliseconds += int((time_offset * 1000) % 1000)
return subs
def merge_videos_and_subtitles(video_paths, subtitle_paths, output_video_path, output_subtitle_path):
"""合并视频和字幕文件"""
if len(video_paths) != len(subtitle_paths):
raise ValueError("视频文件数量与字幕文件数量不匹配")
# 1. 合并视频
video_clips = []
accumulated_duration = 0
merged_subs = pysrt.SubRipFile()
try:
# 处理所有视频和字幕
for i, (video_path, subtitle_path) in enumerate(zip(video_paths, subtitle_paths)):
# 添加视频
print(f"处理视频 {i + 1}/{len(video_paths)}: {video_path}")
video_clip = VideoFileClip(video_path)
video_clips.append(video_clip)
# 处理字幕
print(f"处理字幕 {i + 1}/{len(subtitle_paths)}: {subtitle_path}")
if i == 0:
# 第一个字幕文件直接读取
current_subs = pysrt.open(subtitle_path)
else:
# 后续字幕文件需要调整时间戳
current_subs = adjust_subtitle_timing(subtitle_path, accumulated_duration)
# 合并字幕
merged_subs.extend(current_subs)
# 更新累计时长
accumulated_duration += video_clip.duration
# 判断视频是否存在,若已经存在不重复合并
if not os.path.exists(output_video_path):
print("合并视频中...")
final_video = concatenate_videoclips(video_clips)
# 保存合并后的视频
print("保存合并后的视频...")
final_video.write_videofile(output_video_path, audio_codec='aac')
# 保存合并后的字幕
print("保存合并后的字幕...")
merged_subs.save(output_subtitle_path, encoding='utf-8')
print("合并完成")
finally:
# 清理资源
for clip in video_clips:
clip.close()
def main():
# 示例用法
video_paths = [
"temp/1.mp4",
"temp/2.mp4",
"temp/3.mp4",
"temp/4.mp4",
"temp/5.mp4",
]
subtitle_paths = [
"temp/1.srt",
"temp/2.srt",
"temp/3.srt",
"temp/4.srt",
"temp/5.srt",
]
output_video_path = "temp/merged_video.mp4"
output_subtitle_path = "temp/merged_subtitle.srt"
merge_videos_and_subtitles(video_paths, subtitle_paths, output_video_path, output_subtitle_path)
if __name__ == "__main__":
main()

View File

@ -0,0 +1,100 @@
import logging
from typing import List, Dict, Any, Optional
from app.utils import gemini_analyzer, qwenvl_analyzer
logger = logging.getLogger(__name__)
class VisionAnalyzer:
def __init__(self):
self.provider = None
self.api_key = None
self.model = None
self.base_url = None
self.analyzer = None
def initialize_gemini(self, api_key: str, model: str, base_url: str) -> None:
"""
初始化Gemini视觉分析器
Args:
api_key: Gemini API密钥
model: 模型名称
base_url: API基础URL
"""
self.provider = 'gemini'
self.api_key = api_key
self.model = model
self.base_url = base_url
self.analyzer = gemini_analyzer.VisionAnalyzer(
model_name=model,
api_key=api_key
)
def initialize_qwenvl(self, api_key: str, model: str, base_url: str) -> None:
"""
初始化QwenVL视觉分析器
Args:
api_key: 阿里云API密钥
model: 模型名称
base_url: API基础URL
"""
self.provider = 'qwenvl'
self.api_key = api_key
self.model = model
self.base_url = base_url
self.analyzer = qwenvl_analyzer.QwenAnalyzer(
model_name=model,
api_key=api_key
)
async def analyze_images(self, images: List[str], prompt: str, batch_size: int = 5) -> Dict[str, Any]:
"""
分析图片内容
Args:
images: 图片路径列表
prompt: 分析提示词
batch_size: 每批处理的图片数量默认为5
Returns:
Dict: 分析结果
"""
if not self.analyzer:
raise ValueError("未初始化视觉分析器")
return await self.analyzer.analyze_images(
images=images,
prompt=prompt,
batch_size=batch_size
)
def create_vision_analyzer(provider: str, **kwargs) -> VisionAnalyzer:
"""
创建视觉分析器实例
Args:
provider: 提供商名称 ('gemini' 'qwenvl')
**kwargs: 提供商特定的配置参数
Returns:
VisionAnalyzer: 配置好的视觉分析器实例
"""
analyzer = VisionAnalyzer()
if provider.lower() == 'gemini':
analyzer.initialize_gemini(
api_key=kwargs.get('api_key'),
model=kwargs.get('model'),
base_url=kwargs.get('base_url')
)
elif provider.lower() == 'qwenvl':
analyzer.initialize_qwenvl(
api_key=kwargs.get('api_key'),
model=kwargs.get('model'),
base_url=kwargs.get('base_url')
)
else:
raise ValueError(f"不支持的视觉分析提供商: {provider}")
return analyzer