mirror of
https://github.com/linyqh/NarratoAI.git
synced 2025-12-30 18:00:16 +00:00
commit
894ba13026
12
.gitignore
vendored
12
.gitignore
vendored
@ -23,8 +23,12 @@ node_modules
|
||||
# 模型目录
|
||||
/models/
|
||||
./models/*
|
||||
resource/scripts/*
|
||||
resource/videos/*
|
||||
resource/songs/*
|
||||
resource/fonts/*
|
||||
resource/scripts/*.json
|
||||
resource/videos/*.mp4
|
||||
resource/songs/*.mp3
|
||||
resource/songs/*.flac
|
||||
resource/fonts/*.ttc
|
||||
resource/fonts/*.ttf
|
||||
resource/fonts/*.otf
|
||||
resource/srt/*.srt
|
||||
app/models/faster-whisper-large-v2/*
|
||||
@ -43,6 +43,9 @@ NarratoAI 是一个自动化影视解说工具,基于LLM实现文案撰写、
|
||||
- [x] 发布 0.3.5 整合包
|
||||
- [ ] 支持阿里 Qwen2-VL 大模型理解视频
|
||||
- [ ] 支持短剧解说
|
||||
- [x] 合并素材
|
||||
- [ ] 一键转录
|
||||
- [ ] 一键清理缓存
|
||||
- [ ] ...
|
||||
|
||||
## 配置要求 📦
|
||||
|
||||
@ -163,109 +163,109 @@ def delete_video(request: Request, task_id: str = Path(..., description="Task ID
|
||||
)
|
||||
|
||||
|
||||
@router.get(
|
||||
"/musics", response_model=BgmRetrieveResponse, summary="Retrieve local BGM files"
|
||||
)
|
||||
def get_bgm_list(request: Request):
|
||||
suffix = "*.mp3"
|
||||
song_dir = utils.song_dir()
|
||||
files = glob.glob(os.path.join(song_dir, suffix))
|
||||
bgm_list = []
|
||||
for file in files:
|
||||
bgm_list.append(
|
||||
{
|
||||
"name": os.path.basename(file),
|
||||
"size": os.path.getsize(file),
|
||||
"file": file,
|
||||
}
|
||||
)
|
||||
response = {"files": bgm_list}
|
||||
return utils.get_response(200, response)
|
||||
# @router.get(
|
||||
# "/musics", response_model=BgmRetrieveResponse, summary="Retrieve local BGM files"
|
||||
# )
|
||||
# def get_bgm_list(request: Request):
|
||||
# suffix = "*.mp3"
|
||||
# song_dir = utils.song_dir()
|
||||
# files = glob.glob(os.path.join(song_dir, suffix))
|
||||
# bgm_list = []
|
||||
# for file in files:
|
||||
# bgm_list.append(
|
||||
# {
|
||||
# "name": os.path.basename(file),
|
||||
# "size": os.path.getsize(file),
|
||||
# "file": file,
|
||||
# }
|
||||
# )
|
||||
# response = {"files": bgm_list}
|
||||
# return utils.get_response(200, response)
|
||||
#
|
||||
|
||||
|
||||
@router.post(
|
||||
"/musics",
|
||||
response_model=BgmUploadResponse,
|
||||
summary="Upload the BGM file to the songs directory",
|
||||
)
|
||||
def upload_bgm_file(request: Request, file: UploadFile = File(...)):
|
||||
request_id = base.get_task_id(request)
|
||||
# check file ext
|
||||
if file.filename.endswith("mp3"):
|
||||
song_dir = utils.song_dir()
|
||||
save_path = os.path.join(song_dir, file.filename)
|
||||
# save file
|
||||
with open(save_path, "wb+") as buffer:
|
||||
# If the file already exists, it will be overwritten
|
||||
file.file.seek(0)
|
||||
buffer.write(file.file.read())
|
||||
response = {"file": save_path}
|
||||
return utils.get_response(200, response)
|
||||
|
||||
raise HttpException(
|
||||
"", status_code=400, message=f"{request_id}: Only *.mp3 files can be uploaded"
|
||||
)
|
||||
|
||||
|
||||
@router.get("/stream/{file_path:path}")
|
||||
async def stream_video(request: Request, file_path: str):
|
||||
tasks_dir = utils.task_dir()
|
||||
video_path = os.path.join(tasks_dir, file_path)
|
||||
range_header = request.headers.get("Range")
|
||||
video_size = os.path.getsize(video_path)
|
||||
start, end = 0, video_size - 1
|
||||
|
||||
length = video_size
|
||||
if range_header:
|
||||
range_ = range_header.split("bytes=")[1]
|
||||
start, end = [int(part) if part else None for part in range_.split("-")]
|
||||
if start is None:
|
||||
start = video_size - end
|
||||
end = video_size - 1
|
||||
if end is None:
|
||||
end = video_size - 1
|
||||
length = end - start + 1
|
||||
|
||||
def file_iterator(file_path, offset=0, bytes_to_read=None):
|
||||
with open(file_path, "rb") as f:
|
||||
f.seek(offset, os.SEEK_SET)
|
||||
remaining = bytes_to_read or video_size
|
||||
while remaining > 0:
|
||||
bytes_to_read = min(4096, remaining)
|
||||
data = f.read(bytes_to_read)
|
||||
if not data:
|
||||
break
|
||||
remaining -= len(data)
|
||||
yield data
|
||||
|
||||
response = StreamingResponse(
|
||||
file_iterator(video_path, start, length), media_type="video/mp4"
|
||||
)
|
||||
response.headers["Content-Range"] = f"bytes {start}-{end}/{video_size}"
|
||||
response.headers["Accept-Ranges"] = "bytes"
|
||||
response.headers["Content-Length"] = str(length)
|
||||
response.status_code = 206 # Partial Content
|
||||
|
||||
return response
|
||||
|
||||
|
||||
@router.get("/download/{file_path:path}")
|
||||
async def download_video(_: Request, file_path: str):
|
||||
"""
|
||||
download video
|
||||
:param _: Request request
|
||||
:param file_path: video file path, eg: /cd1727ed-3473-42a2-a7da-4faafafec72b/final-1.mp4
|
||||
:return: video file
|
||||
"""
|
||||
tasks_dir = utils.task_dir()
|
||||
video_path = os.path.join(tasks_dir, file_path)
|
||||
file_path = pathlib.Path(video_path)
|
||||
filename = file_path.stem
|
||||
extension = file_path.suffix
|
||||
headers = {"Content-Disposition": f"attachment; filename={filename}{extension}"}
|
||||
return FileResponse(
|
||||
path=video_path,
|
||||
headers=headers,
|
||||
filename=f"{filename}{extension}",
|
||||
media_type=f"video/{extension[1:]}",
|
||||
)
|
||||
# @router.post(
|
||||
# "/musics",
|
||||
# response_model=BgmUploadResponse,
|
||||
# summary="Upload the BGM file to the songs directory",
|
||||
# )
|
||||
# def upload_bgm_file(request: Request, file: UploadFile = File(...)):
|
||||
# request_id = base.get_task_id(request)
|
||||
# # check file ext
|
||||
# if file.filename.endswith("mp3"):
|
||||
# song_dir = utils.song_dir()
|
||||
# save_path = os.path.join(song_dir, file.filename)
|
||||
# # save file
|
||||
# with open(save_path, "wb+") as buffer:
|
||||
# # If the file already exists, it will be overwritten
|
||||
# file.file.seek(0)
|
||||
# buffer.write(file.file.read())
|
||||
# response = {"file": save_path}
|
||||
# return utils.get_response(200, response)
|
||||
#
|
||||
# raise HttpException(
|
||||
# "", status_code=400, message=f"{request_id}: Only *.mp3 files can be uploaded"
|
||||
# )
|
||||
#
|
||||
#
|
||||
# @router.get("/stream/{file_path:path}")
|
||||
# async def stream_video(request: Request, file_path: str):
|
||||
# tasks_dir = utils.task_dir()
|
||||
# video_path = os.path.join(tasks_dir, file_path)
|
||||
# range_header = request.headers.get("Range")
|
||||
# video_size = os.path.getsize(video_path)
|
||||
# start, end = 0, video_size - 1
|
||||
#
|
||||
# length = video_size
|
||||
# if range_header:
|
||||
# range_ = range_header.split("bytes=")[1]
|
||||
# start, end = [int(part) if part else None for part in range_.split("-")]
|
||||
# if start is None:
|
||||
# start = video_size - end
|
||||
# end = video_size - 1
|
||||
# if end is None:
|
||||
# end = video_size - 1
|
||||
# length = end - start + 1
|
||||
#
|
||||
# def file_iterator(file_path, offset=0, bytes_to_read=None):
|
||||
# with open(file_path, "rb") as f:
|
||||
# f.seek(offset, os.SEEK_SET)
|
||||
# remaining = bytes_to_read or video_size
|
||||
# while remaining > 0:
|
||||
# bytes_to_read = min(4096, remaining)
|
||||
# data = f.read(bytes_to_read)
|
||||
# if not data:
|
||||
# break
|
||||
# remaining -= len(data)
|
||||
# yield data
|
||||
#
|
||||
# response = StreamingResponse(
|
||||
# file_iterator(video_path, start, length), media_type="video/mp4"
|
||||
# )
|
||||
# response.headers["Content-Range"] = f"bytes {start}-{end}/{video_size}"
|
||||
# response.headers["Accept-Ranges"] = "bytes"
|
||||
# response.headers["Content-Length"] = str(length)
|
||||
# response.status_code = 206 # Partial Content
|
||||
#
|
||||
# return response
|
||||
#
|
||||
#
|
||||
# @router.get("/download/{file_path:path}")
|
||||
# async def download_video(_: Request, file_path: str):
|
||||
# """
|
||||
# download video
|
||||
# :param _: Request request
|
||||
# :param file_path: video file path, eg: /cd1727ed-3473-42a2-a7da-4faafafec72b/final-1.mp4
|
||||
# :return: video file
|
||||
# """
|
||||
# tasks_dir = utils.task_dir()
|
||||
# video_path = os.path.join(tasks_dir, file_path)
|
||||
# file_path = pathlib.Path(video_path)
|
||||
# filename = file_path.stem
|
||||
# extension = file_path.suffix
|
||||
# headers = {"Content-Disposition": f"attachment; filename={filename}{extension}"}
|
||||
# return FileResponse(
|
||||
# path=video_path,
|
||||
# headers=headers,
|
||||
# filename=f"{filename}{extension}",
|
||||
# media_type=f"video/{extension[1:]}",
|
||||
# )
|
||||
|
||||
11
app/controllers/v2/base.py
Normal file
11
app/controllers/v2/base.py
Normal file
@ -0,0 +1,11 @@
|
||||
from fastapi import APIRouter, Depends
|
||||
|
||||
|
||||
def v2_router(dependencies=None):
|
||||
router = APIRouter()
|
||||
router.tags = ["V2"]
|
||||
router.prefix = "/api/v2"
|
||||
# 将认证依赖项应用于所有路由
|
||||
if dependencies:
|
||||
router.dependencies = dependencies
|
||||
return router
|
||||
170
app/controllers/v2/script.py
Normal file
170
app/controllers/v2/script.py
Normal file
@ -0,0 +1,170 @@
|
||||
from fastapi import APIRouter, BackgroundTasks
|
||||
from loguru import logger
|
||||
import os
|
||||
|
||||
from app.models.schema_v2 import (
|
||||
GenerateScriptRequest,
|
||||
GenerateScriptResponse,
|
||||
CropVideoRequest,
|
||||
CropVideoResponse,
|
||||
DownloadVideoRequest,
|
||||
DownloadVideoResponse,
|
||||
StartSubclipRequest,
|
||||
StartSubclipResponse
|
||||
)
|
||||
from app.models.schema import VideoClipParams
|
||||
from app.services.script_service import ScriptGenerator
|
||||
from app.services.video_service import VideoService
|
||||
from app.utils import utils
|
||||
from app.controllers.v2.base import v2_router
|
||||
from app.models.schema import VideoClipParams
|
||||
from app.services.youtube_service import YoutubeService
|
||||
from app.services import task as task_service
|
||||
|
||||
router = v2_router()
|
||||
|
||||
|
||||
@router.post(
|
||||
"/scripts/generate",
|
||||
response_model=GenerateScriptResponse,
|
||||
summary="同步请求;生成视频脚本 (V2)"
|
||||
)
|
||||
async def generate_script(
|
||||
request: GenerateScriptRequest,
|
||||
background_tasks: BackgroundTasks
|
||||
):
|
||||
"""
|
||||
生成视频脚本的V2版本API
|
||||
"""
|
||||
task_id = utils.get_uuid()
|
||||
|
||||
try:
|
||||
generator = ScriptGenerator()
|
||||
script = await generator.generate_script(
|
||||
video_path=request.video_path,
|
||||
video_theme=request.video_theme,
|
||||
custom_prompt=request.custom_prompt,
|
||||
skip_seconds=request.skip_seconds,
|
||||
threshold=request.threshold,
|
||||
vision_batch_size=request.vision_batch_size,
|
||||
vision_llm_provider=request.vision_llm_provider
|
||||
)
|
||||
|
||||
return {
|
||||
"task_id": task_id,
|
||||
"script": script
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
logger.exception(f"Generate script failed: {str(e)}")
|
||||
raise
|
||||
|
||||
|
||||
@router.post(
|
||||
"/scripts/crop",
|
||||
response_model=CropVideoResponse,
|
||||
summary="同步请求;裁剪视频 (V2)"
|
||||
)
|
||||
async def crop_video(
|
||||
request: CropVideoRequest,
|
||||
background_tasks: BackgroundTasks
|
||||
):
|
||||
"""
|
||||
根据脚本裁剪视频的V2版本API
|
||||
"""
|
||||
try:
|
||||
# 调用视频裁剪服务
|
||||
video_service = VideoService()
|
||||
task_id, subclip_videos = await video_service.crop_video(
|
||||
video_path=request.video_origin_path,
|
||||
video_script=request.video_script
|
||||
)
|
||||
logger.debug(f"裁剪视频成功,视频片段路径: {subclip_videos}")
|
||||
logger.debug(type(subclip_videos))
|
||||
return {
|
||||
"task_id": task_id,
|
||||
"subclip_videos": subclip_videos
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
logger.exception(f"Crop video failed: {str(e)}")
|
||||
raise
|
||||
|
||||
|
||||
@router.post(
|
||||
"/youtube/download",
|
||||
response_model=DownloadVideoResponse,
|
||||
summary="同步请求;下载YouTube视频 (V2)"
|
||||
)
|
||||
async def download_youtube_video(
|
||||
request: DownloadVideoRequest,
|
||||
background_tasks: BackgroundTasks
|
||||
):
|
||||
"""
|
||||
下载指定分辨率的YouTube视频
|
||||
"""
|
||||
try:
|
||||
youtube_service = YoutubeService()
|
||||
task_id, output_path, filename = await youtube_service.download_video(
|
||||
url=request.url,
|
||||
resolution=request.resolution,
|
||||
output_format=request.output_format,
|
||||
rename=request.rename
|
||||
)
|
||||
|
||||
return {
|
||||
"task_id": task_id,
|
||||
"output_path": output_path,
|
||||
"resolution": request.resolution,
|
||||
"format": request.output_format,
|
||||
"filename": filename
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
logger.exception(f"Download YouTube video failed: {str(e)}")
|
||||
raise
|
||||
|
||||
|
||||
@router.post(
|
||||
"/scripts/start-subclip",
|
||||
response_model=StartSubclipResponse,
|
||||
summary="异步请求;开始视频剪辑任务 (V2)"
|
||||
)
|
||||
async def start_subclip(
|
||||
request: VideoClipParams,
|
||||
task_id: str,
|
||||
subclip_videos: dict,
|
||||
background_tasks: BackgroundTasks
|
||||
):
|
||||
"""
|
||||
开始视频剪辑任务的V2版本API
|
||||
"""
|
||||
try:
|
||||
# 构建参数对象
|
||||
params = VideoClipParams(
|
||||
video_origin_path=request.video_origin_path,
|
||||
video_clip_json_path=request.video_clip_json_path,
|
||||
voice_name=request.voice_name,
|
||||
voice_rate=request.voice_rate,
|
||||
voice_pitch=request.voice_pitch,
|
||||
subtitle_enabled=request.subtitle_enabled,
|
||||
video_aspect=request.video_aspect,
|
||||
n_threads=request.n_threads
|
||||
)
|
||||
|
||||
# 在后台任务中执行视频剪辑
|
||||
background_tasks.add_task(
|
||||
task_service.start_subclip,
|
||||
task_id=task_id,
|
||||
params=params,
|
||||
subclip_path_videos=subclip_videos
|
||||
)
|
||||
|
||||
return {
|
||||
"task_id": task_id,
|
||||
"state": "PROCESSING" # 初始状态
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
logger.exception(f"Start subclip task failed: {str(e)}")
|
||||
raise
|
||||
@ -366,6 +366,8 @@ class VideoClipParams(BaseModel):
|
||||
custom_position: float = Field(default=70.0, description="自定义位置")
|
||||
|
||||
n_threads: Optional[int] = 8 # 线程数,有助于提升视频处理速度
|
||||
tts_volume: float = 1.0 # TTS音频音量
|
||||
video_volume: float = 0.1 # 视频原声音量
|
||||
|
||||
class VideoTranscriptionRequest(BaseModel):
|
||||
video_name: str
|
||||
|
||||
62
app/models/schema_v2.py
Normal file
62
app/models/schema_v2.py
Normal file
@ -0,0 +1,62 @@
|
||||
from typing import Optional, List
|
||||
from pydantic import BaseModel
|
||||
|
||||
|
||||
class GenerateScriptRequest(BaseModel):
|
||||
video_path: str
|
||||
video_theme: Optional[str] = ""
|
||||
custom_prompt: Optional[str] = ""
|
||||
skip_seconds: Optional[int] = 0
|
||||
threshold: Optional[int] = 30
|
||||
vision_batch_size: Optional[int] = 5
|
||||
vision_llm_provider: Optional[str] = "gemini"
|
||||
|
||||
|
||||
class GenerateScriptResponse(BaseModel):
|
||||
task_id: str
|
||||
script: List[dict]
|
||||
|
||||
|
||||
class CropVideoRequest(BaseModel):
|
||||
video_origin_path: str
|
||||
video_script: List[dict]
|
||||
|
||||
|
||||
class CropVideoResponse(BaseModel):
|
||||
task_id: str
|
||||
subclip_videos: dict
|
||||
|
||||
|
||||
class DownloadVideoRequest(BaseModel):
|
||||
url: str
|
||||
resolution: str
|
||||
output_format: Optional[str] = "mp4"
|
||||
rename: Optional[str] = None
|
||||
|
||||
|
||||
class DownloadVideoResponse(BaseModel):
|
||||
task_id: str
|
||||
output_path: str
|
||||
resolution: str
|
||||
format: str
|
||||
filename: str
|
||||
|
||||
|
||||
class StartSubclipRequest(BaseModel):
|
||||
task_id: str
|
||||
video_origin_path: str
|
||||
video_clip_json_path: str
|
||||
voice_name: Optional[str] = None
|
||||
voice_rate: Optional[int] = 0
|
||||
voice_pitch: Optional[int] = 0
|
||||
subtitle_enabled: Optional[bool] = True
|
||||
video_aspect: Optional[str] = "16:9"
|
||||
n_threads: Optional[int] = 4
|
||||
subclip_videos: list # 从裁剪视频接口获取的视频片段字典
|
||||
|
||||
|
||||
class StartSubclipResponse(BaseModel):
|
||||
task_id: str
|
||||
state: str
|
||||
videos: Optional[List[str]] = None
|
||||
combined_videos: Optional[List[str]] = None
|
||||
@ -10,8 +10,12 @@ Resources:
|
||||
from fastapi import APIRouter
|
||||
|
||||
from app.controllers.v1 import llm, video
|
||||
from app.controllers.v2 import script
|
||||
|
||||
root_api_router = APIRouter()
|
||||
# v1
|
||||
root_api_router.include_router(video.router)
|
||||
root_api_router.include_router(llm.router)
|
||||
|
||||
# v2
|
||||
root_api_router.include_router(script.router)
|
||||
|
||||
@ -18,95 +18,119 @@ def check_ffmpeg():
|
||||
return False
|
||||
|
||||
|
||||
def merge_audio_files(task_id: str, audio_file_paths: List[str], total_duration: int, video_script: list):
|
||||
def merge_audio_files(task_id: str, audio_files: list, total_duration: float, list_script: list):
|
||||
"""
|
||||
合并多个音频文件到一个指定总时长的音频文件中,并生成相应的字幕
|
||||
:param task_id: 任务ID
|
||||
:param audio_file_paths: 音频文件路径列表
|
||||
:param total_duration: 最终音频文件的总时长(秒)
|
||||
:param video_script: JSON格式的视频脚本
|
||||
合并音频文件,根据OST设置处理不同的音频轨道
|
||||
|
||||
Args:
|
||||
task_id: 任务ID
|
||||
audio_files: TTS生成的音频文件列表
|
||||
total_duration: 总时长
|
||||
list_script: 完整脚本信息,包含OST设置
|
||||
|
||||
Returns:
|
||||
str: 合并后的音频文件路径
|
||||
"""
|
||||
output_dir = utils.task_dir(task_id)
|
||||
|
||||
# 检查FFmpeg是否安装
|
||||
if not check_ffmpeg():
|
||||
logger.error("错误:FFmpeg未安装。请安装FFmpeg后再运行此脚本。")
|
||||
return None, None
|
||||
logger.error("FFmpeg未安装,无法合并音频文件")
|
||||
return None
|
||||
|
||||
# 创建一个总时长为total_duration的空白音频
|
||||
blank_audio = AudioSegment.silent(duration=total_duration * 1000) # pydub使用毫秒
|
||||
# 创建一个空的音频片段
|
||||
final_audio = AudioSegment.silent(duration=total_duration * 1000) # 总时长以毫秒为单位
|
||||
|
||||
for audio_path in audio_file_paths:
|
||||
if not os.path.exists(audio_path):
|
||||
logger.info(f"警告:文件 {audio_path} 不存在,已跳过。")
|
||||
# 遍历脚本中的每个片段
|
||||
for segment, audio_file in zip(list_script, audio_files):
|
||||
try:
|
||||
# 加载TTS音频文件
|
||||
tts_audio = AudioSegment.from_file(audio_file)
|
||||
|
||||
# 获取片段的开始和结束时间
|
||||
start_time, end_time = segment['new_timestamp'].split('-')
|
||||
start_seconds = utils.time_to_seconds(start_time)
|
||||
end_seconds = utils.time_to_seconds(end_time)
|
||||
|
||||
# 根据OST设置处理音频
|
||||
if segment['OST'] == 0:
|
||||
# 只使用TTS音频
|
||||
final_audio = final_audio.overlay(tts_audio, position=start_seconds * 1000)
|
||||
elif segment['OST'] == 1:
|
||||
# 只使用原声(假设原声已经在视频中)
|
||||
continue
|
||||
elif segment['OST'] == 2:
|
||||
# 混合TTS音频和原声
|
||||
original_audio = AudioSegment.silent(duration=(end_seconds - start_seconds) * 1000)
|
||||
mixed_audio = original_audio.overlay(tts_audio)
|
||||
final_audio = final_audio.overlay(mixed_audio, position=start_seconds * 1000)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"处理音频文件 {audio_file} 时出错: {str(e)}")
|
||||
continue
|
||||
|
||||
# 从文件名中提取时间戳
|
||||
filename = os.path.basename(audio_path)
|
||||
start_time, end_time = extract_timestamp(filename)
|
||||
# 保存合并后的音频文件
|
||||
output_audio_path = os.path.join(utils.task_dir(task_id), "final_audio.mp3")
|
||||
final_audio.export(output_audio_path, format="mp3")
|
||||
logger.info(f"合并后的音频文件已保存: {output_audio_path}")
|
||||
|
||||
# 读取音频文件
|
||||
try:
|
||||
audio = AudioSegment.from_mp3(audio_path)
|
||||
except Exception as e:
|
||||
logger.error(f"错误:无法读取文件 {audio_path}。错误信息:{str(e)}")
|
||||
continue
|
||||
|
||||
# 将音频插入到空白音频的指定位置
|
||||
blank_audio = blank_audio.overlay(audio, position=start_time * 1000)
|
||||
|
||||
# 尝试导出为WAV格式
|
||||
try:
|
||||
output_file = os.path.join(output_dir, "audio.wav")
|
||||
blank_audio.export(output_file, format="wav")
|
||||
logger.info(f"音频合并完成,已保存为 {output_file}")
|
||||
except Exception as e:
|
||||
logger.info(f"导出为WAV格式失败,尝试使用MP3格式:{str(e)}")
|
||||
try:
|
||||
output_file = os.path.join(output_dir, "audio.mp3")
|
||||
blank_audio.export(output_file, format="mp3", codec="libmp3lame")
|
||||
logger.info(f"音频合并完成,已保存为 {output_file}")
|
||||
except Exception as e:
|
||||
logger.error(f"导出音频失败:{str(e)}")
|
||||
return None, None
|
||||
|
||||
return output_file
|
||||
|
||||
def parse_timestamp(timestamp: str):
|
||||
"""解析时间戳字符串为秒数"""
|
||||
# 确保使用冒号作为分隔符
|
||||
timestamp = timestamp.replace('_', ':')
|
||||
return time_to_seconds(timestamp)
|
||||
|
||||
def extract_timestamp(filename):
|
||||
"""从文件名中提取开始和结束时间戳"""
|
||||
# 从 "audio_00_06-00_24.mp3" 这样的格式中提取时间
|
||||
time_part = filename.split('_', 1)[1].split('.')[0] # 获取 "00_06-00_24" 部分
|
||||
start_time, end_time = time_part.split('-') # 分割成 "00_06" 和 "00_24"
|
||||
|
||||
# 将下划线格式转换回冒号格式
|
||||
start_time = start_time.replace('_', ':')
|
||||
end_time = end_time.replace('_', ':')
|
||||
|
||||
# 将时间戳转换为秒
|
||||
start_seconds = time_to_seconds(start_time)
|
||||
end_seconds = time_to_seconds(end_time)
|
||||
|
||||
return start_seconds, end_seconds
|
||||
return output_audio_path
|
||||
|
||||
|
||||
def time_to_seconds(time_str):
|
||||
"""将 "00:06" 或 "00_06" 格式转换为总秒数"""
|
||||
# 确保使用冒号作为分隔符
|
||||
time_str = time_str.replace('_', ':')
|
||||
"""
|
||||
将时间字符串转换为秒数,支持多种格式:
|
||||
1. 'HH:MM:SS,mmm' (时:分:秒,毫秒)
|
||||
2. 'MM:SS,mmm' (分:秒,毫秒)
|
||||
3. 'SS,mmm' (秒,毫秒)
|
||||
"""
|
||||
try:
|
||||
parts = time_str.split(':')
|
||||
if len(parts) != 2:
|
||||
logger.error(f"Invalid time format: {time_str}")
|
||||
return 0
|
||||
return int(parts[0]) * 60 + int(parts[1])
|
||||
# 处理毫秒部分
|
||||
if ',' in time_str:
|
||||
time_part, ms_part = time_str.split(',')
|
||||
ms = float(ms_part) / 1000
|
||||
else:
|
||||
time_part = time_str
|
||||
ms = 0
|
||||
|
||||
# 分割时间部分
|
||||
parts = time_part.split(':')
|
||||
|
||||
if len(parts) == 3: # HH:MM:SS
|
||||
h, m, s = map(int, parts)
|
||||
seconds = h * 3600 + m * 60 + s
|
||||
elif len(parts) == 2: # MM:SS
|
||||
m, s = map(int, parts)
|
||||
seconds = m * 60 + s
|
||||
else: # SS
|
||||
seconds = int(parts[0])
|
||||
|
||||
return seconds + ms
|
||||
except (ValueError, IndexError) as e:
|
||||
logger.error(f"Error parsing time {time_str}: {str(e)}")
|
||||
return 0
|
||||
return 0.0
|
||||
|
||||
|
||||
def extract_timestamp(filename):
|
||||
"""
|
||||
从文件名中提取开始和结束时间戳
|
||||
例如: "audio_00_06,500-00_24,800.mp3" -> (6.5, 24.8)
|
||||
"""
|
||||
try:
|
||||
# 从文件名中提取时间部分
|
||||
time_part = filename.split('_', 1)[1].split('.')[0] # 获取 "00_06,500-00_24,800" 部分
|
||||
start_time, end_time = time_part.split('-') # 分割成开始和结束时间
|
||||
|
||||
# 将下划线格式转换回冒号格式
|
||||
start_time = start_time.replace('_', ':')
|
||||
end_time = end_time.replace('_', ':')
|
||||
|
||||
# 将时间戳转换为秒
|
||||
start_seconds = time_to_seconds(start_time)
|
||||
end_seconds = time_to_seconds(end_time)
|
||||
|
||||
return start_seconds, end_seconds
|
||||
except Exception as e:
|
||||
logger.error(f"Error extracting timestamp from {filename}: {str(e)}")
|
||||
return 0.0, 0.0
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
||||
@ -3,6 +3,7 @@ import subprocess
|
||||
import random
|
||||
import traceback
|
||||
from urllib.parse import urlencode
|
||||
from datetime import datetime
|
||||
|
||||
import requests
|
||||
from typing import List
|
||||
@ -254,70 +255,105 @@ def download_videos(
|
||||
def time_to_seconds(time_str: str) -> float:
|
||||
"""
|
||||
将时间字符串转换为秒数
|
||||
支持格式:
|
||||
1. "MM:SS" (分:秒)
|
||||
2. "SS" (纯秒数)
|
||||
支持格式: 'HH:MM:SS,mmm' (时:分:秒,毫秒)
|
||||
|
||||
Args:
|
||||
time_str: 时间字符串,如 "00:00:20,100"
|
||||
|
||||
Returns:
|
||||
float: 转换后的秒数(包含毫秒)
|
||||
"""
|
||||
parts = time_str.split(':')
|
||||
if len(parts) == 2:
|
||||
minutes, seconds = map(float, parts)
|
||||
return minutes * 60 + seconds
|
||||
return float(time_str)
|
||||
try:
|
||||
# 处理毫秒部分
|
||||
if ',' in time_str:
|
||||
time_part, ms_part = time_str.split(',')
|
||||
ms = int(ms_part) / 1000
|
||||
else:
|
||||
time_part = time_str
|
||||
ms = 0
|
||||
|
||||
# 处理时分秒
|
||||
parts = time_part.split(':')
|
||||
if len(parts) == 3: # HH:MM:SS
|
||||
h, m, s = map(int, parts)
|
||||
seconds = h * 3600 + m * 60 + s
|
||||
else:
|
||||
raise ValueError("时间格式必须为 HH:MM:SS,mmm")
|
||||
|
||||
return seconds + ms
|
||||
|
||||
except ValueError as e:
|
||||
logger.error(f"时间格式错误: {time_str}")
|
||||
raise ValueError(f"时间格式错误: 必须为 HH:MM:SS,mmm 格式") from e
|
||||
|
||||
|
||||
def format_timestamp(seconds: float) -> str:
|
||||
"""
|
||||
将秒数转换为 "MM:SS" 格式的时间字符串
|
||||
将秒数转换为可读的时间格式 (HH:MM:SS,mmm)
|
||||
|
||||
Args:
|
||||
seconds: 秒数(可包含毫秒)
|
||||
|
||||
Returns:
|
||||
str: 格式化的时间字符串,如 "00:00:20,100"
|
||||
"""
|
||||
minutes = int(seconds) // 60
|
||||
secs = int(seconds) % 60
|
||||
return f"{minutes:02d}:{secs:02d}"
|
||||
hours = int(seconds // 3600)
|
||||
minutes = int((seconds % 3600) // 60)
|
||||
seconds_remain = seconds % 60
|
||||
whole_seconds = int(seconds_remain)
|
||||
milliseconds = int((seconds_remain - whole_seconds) * 1000)
|
||||
|
||||
return f"{hours:02d}:{minutes:02d}:{whole_seconds:02d},{milliseconds:03d}"
|
||||
|
||||
|
||||
def save_clip_video(timestamp: str, origin_video: str, save_dir: str = "") -> dict:
|
||||
"""
|
||||
保存剪辑后的视频
|
||||
|
||||
Args:
|
||||
timestamp: 需要裁剪的单个时间戳,支持两种格式:
|
||||
1. '00:36-00:40' (分:秒-分:秒)
|
||||
2. 'SS-SS' (秒-秒)
|
||||
timestamp: 需要裁剪的时间戳,格式为 'HH:MM:SS,mmm-HH:MM:SS,mmm'
|
||||
例如: '00:00:00,000-00:00:20,100'
|
||||
origin_video: 原视频路径
|
||||
save_dir: 存储目录
|
||||
|
||||
Returns:
|
||||
裁剪后的视频路径,格式为 {timestamp: video_path}
|
||||
dict: 裁剪后的视频路径,格式为 {timestamp: video_path}
|
||||
"""
|
||||
# 使用新的路径结构
|
||||
if not save_dir:
|
||||
save_dir = utils.storage_dir("cache_videos")
|
||||
base_dir = os.path.join(utils.temp_dir(), "clip_video")
|
||||
video_hash = utils.md5(origin_video)
|
||||
save_dir = os.path.join(base_dir, video_hash)
|
||||
|
||||
if not os.path.exists(save_dir):
|
||||
os.makedirs(save_dir)
|
||||
|
||||
video_id = f"vid-{timestamp.replace(':', '_')}"
|
||||
video_path = f"{save_dir}/{video_id}.mp4"
|
||||
# 生成更规范的视频文件名
|
||||
video_id = f"vid-{timestamp.replace(':', '-').replace(',', '_')}"
|
||||
video_path = os.path.join(save_dir, f"{video_id}.mp4")
|
||||
|
||||
if os.path.exists(video_path) and os.path.getsize(video_path) > 0:
|
||||
logger.info(f"video already exists: {video_path}")
|
||||
return {timestamp: video_path}
|
||||
|
||||
try:
|
||||
# 先加载视频获取总时长
|
||||
# 加载视频获取总时长
|
||||
video = VideoFileClip(origin_video)
|
||||
total_duration = video.duration
|
||||
|
||||
# 获取目标时间段
|
||||
# 解析时间戳
|
||||
start_str, end_str = timestamp.split('-')
|
||||
start = time_to_seconds(start_str)
|
||||
end = time_to_seconds(end_str)
|
||||
|
||||
# 验证时间段是否有效
|
||||
# 验证时间段
|
||||
if start >= total_duration:
|
||||
logger.warning(f"起始时间 {format_timestamp(start)} ({start:.2f}秒) 超出视频总时长 {format_timestamp(total_duration)} ({total_duration:.2f}秒)")
|
||||
logger.warning(f"起始时间 {format_timestamp(start)} ({start:.3f}秒) 超出视频总时长 {format_timestamp(total_duration)} ({total_duration:.3f}秒)")
|
||||
video.close()
|
||||
return {}
|
||||
|
||||
if end > total_duration:
|
||||
logger.warning(f"结束时间 {format_timestamp(end)} ({end:.2f}秒) 超出视频总时长 {format_timestamp(total_duration)} ({total_duration:.2f}秒),将自动调整为视频结尾")
|
||||
logger.warning(f"结束时间 {format_timestamp(end)} ({end:.3f}秒) 超出视频总时长 {format_timestamp(total_duration)} ({total_duration:.3f}秒),将自动调整为视频结尾")
|
||||
end = total_duration
|
||||
|
||||
if end <= start:
|
||||
@ -328,11 +364,21 @@ def save_clip_video(timestamp: str, origin_video: str, save_dir: str = "") -> di
|
||||
# 剪辑视频
|
||||
duration = end - start
|
||||
logger.info(f"开始剪辑视频: {format_timestamp(start)} - {format_timestamp(end)},时长 {format_timestamp(duration)}")
|
||||
|
||||
# 剪辑视频
|
||||
subclip = video.subclip(start, end)
|
||||
|
||||
try:
|
||||
# 检查视频是否有音频轨道并写入文件
|
||||
subclip.write_videofile(video_path, audio=(subclip.audio is not None), logger=None)
|
||||
subclip.write_videofile(
|
||||
video_path,
|
||||
codec='libx264',
|
||||
audio_codec='aac',
|
||||
temp_audiofile='temp-audio.m4a',
|
||||
remove_temp=True,
|
||||
audio=(subclip.audio is not None),
|
||||
logger=None
|
||||
)
|
||||
|
||||
# 验证生成的视频文件
|
||||
if os.path.exists(video_path) and os.path.getsize(video_path) > 0:
|
||||
@ -363,12 +409,12 @@ def save_clip_video(timestamp: str, origin_video: str, save_dir: str = "") -> di
|
||||
return {}
|
||||
|
||||
|
||||
def clip_videos(task_id: str, timestamp_terms: List[str], origin_video: str, progress_callback=None):
|
||||
def clip_videos(task_id: str, timestamp_terms: List[str], origin_video: str, progress_callback=None) -> dict:
|
||||
"""
|
||||
剪辑视频
|
||||
Args:
|
||||
task_id: 任务id
|
||||
timestamp_terms: 需要剪辑的时间戳列表,如:['00:00-00:20', '00:36-00:40', '07:07-07:22']
|
||||
timestamp_terms: 需要剪辑的时间戳列表,如:['00:00:00,000-00:00:20,100', '00:00:43,039-00:00:46,959']
|
||||
origin_video: 原视频路径
|
||||
progress_callback: 进度回调函数
|
||||
|
||||
@ -379,11 +425,6 @@ def clip_videos(task_id: str, timestamp_terms: List[str], origin_video: str, pro
|
||||
total_items = len(timestamp_terms)
|
||||
for index, item in enumerate(timestamp_terms):
|
||||
material_directory = config.app.get("material_directory", "").strip()
|
||||
if material_directory == "task":
|
||||
material_directory = utils.task_dir(task_id)
|
||||
elif material_directory and not os.path.isdir(material_directory):
|
||||
material_directory = ""
|
||||
|
||||
try:
|
||||
saved_video_path = save_clip_video(timestamp=item, origin_video=origin_video, save_dir=material_directory)
|
||||
if saved_video_path:
|
||||
@ -396,6 +437,7 @@ def clip_videos(task_id: str, timestamp_terms: List[str], origin_video: str, pro
|
||||
except Exception as e:
|
||||
logger.error(f"视频裁剪失败: {utils.to_json(item)} =>\n{str(traceback.format_exc())}")
|
||||
return {}
|
||||
|
||||
logger.success(f"裁剪 {len(video_paths)} videos")
|
||||
return video_paths
|
||||
|
||||
@ -455,29 +497,3 @@ def merge_videos(video_paths, ost_list):
|
||||
os.remove(silent_video)
|
||||
|
||||
return output_file
|
||||
|
||||
|
||||
# 使用示例
|
||||
# if __name__ == "__main__":
|
||||
# video_paths = ['/Users/apple/Desktop/home/NarratoAI/storage/cache_videos/vid-01_17-01_37.mp4', '/Users/apple/Desktop/home/NarratoAI/storage/cache_videos/vid-00_00-00_06.mp4',
|
||||
# '/Users/apple/Desktop/home/NarratoAI/storage/cache_videos/vid-00_06-00_09.mp4', '/Users/apple/Desktop/home/NarratoAI/storage/cache_videos/vid-01_03-01_10.mp4',
|
||||
# '/Users/apple/Desktop/home/NarratoAI/storage/cache_videos/vid-01_10-01_17.mp4', '/Users/apple/Desktop/home/NarratoAI/storage/cache_videos/vid-00_24-00_27.mp4',
|
||||
# '/Users/apple/Desktop/home/NarratoAI/storage/cache_videos/vid-01_28-01_36.mp4', '/Users/apple/Desktop/home/NarratoAI/storage/cache_videos/vid-00_32-00_41.mp4',
|
||||
# '/Users/apple/Desktop/home/NarratoAI/storage/cache_videos/vid-01_36-01_58.mp4', '/Users/apple/Desktop/home/NarratoAI/storage/cache_videos/vid-00_12-00_15.mp4',
|
||||
# '/Users/apple/Desktop/home/NarratoAI/storage/cache_videos/vid-00_09-00_12.mp4', '/Users/apple/Desktop/home/NarratoAI/storage/cache_videos/vid-02_12-02_25.mp4',
|
||||
# '/Users/apple/Desktop/home/NarratoAI/storage/cache_videos/vid-02_03-02_12.mp4', '/Users/apple/Desktop/home/NarratoAI/storage/cache_videos/vid-01_58-02_03.mp4',
|
||||
# '/Users/apple/Desktop/home/NarratoAI/storage/cache_videos/vid-03_14-03_18.mp4', '/Users/apple/Desktop/home/NarratoAI/storage/cache_videos/vid-03_18-03_20.mp4']
|
||||
#
|
||||
# ost_list = [True, False, False, False, False, False, False, False, True, False, False, False, False, False, False,
|
||||
# False]
|
||||
#
|
||||
# result = merge_videos(video_paths, ost_list)
|
||||
# if result:
|
||||
# print(f"合并后的视频文件:{result}")
|
||||
# else:
|
||||
# print("视频合并失败")
|
||||
#
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
save_clip_video('00:50-01:41', 'E:\\projects\\NarratoAI\\resource\\videos\\WeChat_20241110144511.mp4')
|
||||
|
||||
405
app/services/script_service.py
Normal file
405
app/services/script_service.py
Normal file
@ -0,0 +1,405 @@
|
||||
import os
|
||||
import json
|
||||
import time
|
||||
import asyncio
|
||||
import requests
|
||||
from loguru import logger
|
||||
from typing import List, Dict, Any, Callable
|
||||
|
||||
from app.utils import utils, gemini_analyzer, video_processor, video_processor_v2
|
||||
from app.utils.script_generator import ScriptProcessor
|
||||
from app.config import config
|
||||
|
||||
|
||||
class ScriptGenerator:
|
||||
def __init__(self):
|
||||
self.temp_dir = utils.temp_dir()
|
||||
self.keyframes_dir = os.path.join(self.temp_dir, "keyframes")
|
||||
|
||||
async def generate_script(
|
||||
self,
|
||||
video_path: str,
|
||||
video_theme: str = "",
|
||||
custom_prompt: str = "",
|
||||
skip_seconds: int = 0,
|
||||
threshold: int = 30,
|
||||
vision_batch_size: int = 5,
|
||||
vision_llm_provider: str = "gemini",
|
||||
progress_callback: Callable[[float, str], None] = None
|
||||
) -> List[Dict[Any, Any]]:
|
||||
"""
|
||||
生成视频脚本的核心逻辑
|
||||
|
||||
Args:
|
||||
video_path: 视频文件路径
|
||||
video_theme: 视频主题
|
||||
custom_prompt: 自定义提示词
|
||||
skip_seconds: 跳过开始的秒数
|
||||
threshold: 差异<EFBFBD><EFBFBD><EFBFBD>值
|
||||
vision_batch_size: 视觉处理批次大小
|
||||
vision_llm_provider: 视觉模型提供商
|
||||
progress_callback: 进度回调函数
|
||||
|
||||
Returns:
|
||||
List[Dict]: 生成的视频脚本
|
||||
"""
|
||||
if progress_callback is None:
|
||||
progress_callback = lambda p, m: None
|
||||
|
||||
try:
|
||||
# 提取关键帧
|
||||
progress_callback(10, "正在提取关键帧...")
|
||||
keyframe_files = await self._extract_keyframes(
|
||||
video_path,
|
||||
skip_seconds,
|
||||
threshold
|
||||
)
|
||||
|
||||
if vision_llm_provider == "gemini":
|
||||
script = await self._process_with_gemini(
|
||||
keyframe_files,
|
||||
video_theme,
|
||||
custom_prompt,
|
||||
vision_batch_size,
|
||||
progress_callback
|
||||
)
|
||||
elif vision_llm_provider == "narratoapi":
|
||||
script = await self._process_with_narrato(
|
||||
keyframe_files,
|
||||
video_theme,
|
||||
custom_prompt,
|
||||
vision_batch_size,
|
||||
progress_callback
|
||||
)
|
||||
else:
|
||||
raise ValueError(f"Unsupported vision provider: {vision_llm_provider}")
|
||||
|
||||
return json.loads(script) if isinstance(script, str) else script
|
||||
|
||||
except Exception as e:
|
||||
logger.exception("Generate script failed")
|
||||
raise
|
||||
|
||||
async def _extract_keyframes(
|
||||
self,
|
||||
video_path: str,
|
||||
skip_seconds: int,
|
||||
threshold: int
|
||||
) -> List[str]:
|
||||
"""提取视频关键帧"""
|
||||
video_hash = utils.md5(video_path + str(os.path.getmtime(video_path)))
|
||||
video_keyframes_dir = os.path.join(self.keyframes_dir, video_hash)
|
||||
|
||||
# 检查缓存
|
||||
keyframe_files = []
|
||||
if os.path.exists(video_keyframes_dir):
|
||||
for filename in sorted(os.listdir(video_keyframes_dir)):
|
||||
if filename.endswith('.jpg'):
|
||||
keyframe_files.append(os.path.join(video_keyframes_dir, filename))
|
||||
|
||||
if keyframe_files:
|
||||
logger.info(f"Using cached keyframes: {video_keyframes_dir}")
|
||||
return keyframe_files
|
||||
|
||||
# 提取新的关键帧
|
||||
os.makedirs(video_keyframes_dir, exist_ok=True)
|
||||
|
||||
try:
|
||||
if config.frames.get("version") == "v2":
|
||||
processor = video_processor_v2.VideoProcessor(video_path)
|
||||
processor.process_video_pipeline(
|
||||
output_dir=video_keyframes_dir,
|
||||
skip_seconds=skip_seconds,
|
||||
threshold=threshold
|
||||
)
|
||||
else:
|
||||
processor = video_processor.VideoProcessor(video_path)
|
||||
processor.process_video(
|
||||
output_dir=video_keyframes_dir,
|
||||
skip_seconds=skip_seconds
|
||||
)
|
||||
|
||||
for filename in sorted(os.listdir(video_keyframes_dir)):
|
||||
if filename.endswith('.jpg'):
|
||||
keyframe_files.append(os.path.join(video_keyframes_dir, filename))
|
||||
|
||||
return keyframe_files
|
||||
|
||||
except Exception as e:
|
||||
if os.path.exists(video_keyframes_dir):
|
||||
import shutil
|
||||
shutil.rmtree(video_keyframes_dir)
|
||||
raise
|
||||
|
||||
async def _process_with_gemini(
|
||||
self,
|
||||
keyframe_files: List[str],
|
||||
video_theme: str,
|
||||
custom_prompt: str,
|
||||
vision_batch_size: int,
|
||||
progress_callback: Callable[[float, str], None]
|
||||
) -> str:
|
||||
"""使用Gemini处理视频帧"""
|
||||
progress_callback(30, "正在初始化视觉分析器...")
|
||||
|
||||
# 获取Gemini配置
|
||||
vision_api_key = config.app.get("vision_gemini_api_key")
|
||||
vision_model = config.app.get("vision_gemini_model_name")
|
||||
|
||||
if not vision_api_key or not vision_model:
|
||||
raise ValueError("未配置 Gemini API Key 或者模型")
|
||||
|
||||
analyzer = gemini_analyzer.VisionAnalyzer(
|
||||
model_name=vision_model,
|
||||
api_key=vision_api_key,
|
||||
)
|
||||
|
||||
progress_callback(40, "正在分析关键帧...")
|
||||
|
||||
# 执行异步分析
|
||||
results = await analyzer.analyze_images(
|
||||
images=keyframe_files,
|
||||
prompt=config.app.get('vision_analysis_prompt'),
|
||||
batch_size=vision_batch_size
|
||||
)
|
||||
|
||||
progress_callback(60, "正在整理分析结果...")
|
||||
|
||||
# 合并所有批次的分析结果
|
||||
frame_analysis = ""
|
||||
prev_batch_files = None
|
||||
|
||||
for result in results:
|
||||
if 'error' in result:
|
||||
logger.warning(f"批次 {result['batch_index']} 处理出现警告: {result['error']}")
|
||||
continue
|
||||
|
||||
batch_files = self._get_batch_files(keyframe_files, result, vision_batch_size)
|
||||
first_timestamp, last_timestamp, _ = self._get_batch_timestamps(batch_files, prev_batch_files)
|
||||
|
||||
# 添加带时间戳的分<E79A84><E58886>结果
|
||||
frame_analysis += f"\n=== {first_timestamp}-{last_timestamp} ===\n"
|
||||
frame_analysis += result['response']
|
||||
frame_analysis += "\n"
|
||||
|
||||
prev_batch_files = batch_files
|
||||
|
||||
if not frame_analysis.strip():
|
||||
raise Exception("未能生成有效的帧分析结果")
|
||||
|
||||
progress_callback(70, "正在生成脚本...")
|
||||
|
||||
# 构建帧内容列表
|
||||
frame_content_list = []
|
||||
prev_batch_files = None
|
||||
|
||||
for result in results:
|
||||
if 'error' in result:
|
||||
continue
|
||||
|
||||
batch_files = self._get_batch_files(keyframe_files, result, vision_batch_size)
|
||||
_, _, timestamp_range = self._get_batch_timestamps(batch_files, prev_batch_files)
|
||||
|
||||
frame_content = {
|
||||
"timestamp": timestamp_range,
|
||||
"picture": result['response'],
|
||||
"narration": "",
|
||||
"OST": 2
|
||||
}
|
||||
frame_content_list.append(frame_content)
|
||||
prev_batch_files = batch_files
|
||||
|
||||
if not frame_content_list:
|
||||
raise Exception("没有有效的帧内容可以处理")
|
||||
|
||||
progress_callback(90, "正在生成文案...")
|
||||
|
||||
# 获取文本生<E69CAC><E7949F>配置
|
||||
text_provider = config.app.get('text_llm_provider', 'gemini').lower()
|
||||
text_api_key = config.app.get(f'text_{text_provider}_api_key')
|
||||
text_model = config.app.get(f'text_{text_provider}_model_name')
|
||||
|
||||
processor = ScriptProcessor(
|
||||
model_name=text_model,
|
||||
api_key=text_api_key,
|
||||
prompt=custom_prompt,
|
||||
video_theme=video_theme
|
||||
)
|
||||
|
||||
return processor.process_frames(frame_content_list)
|
||||
|
||||
async def _process_with_narrato(
|
||||
self,
|
||||
keyframe_files: List[str],
|
||||
video_theme: str,
|
||||
custom_prompt: str,
|
||||
vision_batch_size: int,
|
||||
progress_callback: Callable[[float, str], None]
|
||||
) -> str:
|
||||
"""使用NarratoAPI处理视频帧"""
|
||||
# 创建临时目录
|
||||
temp_dir = utils.temp_dir("narrato")
|
||||
|
||||
# 打包关键帧
|
||||
progress_callback(30, "正在打包关键帧...")
|
||||
zip_path = os.path.join(temp_dir, f"keyframes_{int(time.time())}.zip")
|
||||
|
||||
try:
|
||||
if not utils.create_zip(keyframe_files, zip_path):
|
||||
raise Exception("打包关键帧失败")
|
||||
|
||||
# 获取API配置
|
||||
api_url = config.app.get("narrato_api_url")
|
||||
api_key = config.app.get("narrato_api_key")
|
||||
|
||||
if not api_key:
|
||||
raise ValueError("未配置 Narrato API Key")
|
||||
|
||||
headers = {
|
||||
'X-API-Key': api_key,
|
||||
'accept': 'application/json'
|
||||
}
|
||||
|
||||
api_params = {
|
||||
'batch_size': vision_batch_size,
|
||||
'use_ai': False,
|
||||
'start_offset': 0,
|
||||
'vision_model': config.app.get('narrato_vision_model', 'gemini-1.5-flash'),
|
||||
'vision_api_key': config.app.get('narrato_vision_key'),
|
||||
'llm_model': config.app.get('narrato_llm_model', 'qwen-plus'),
|
||||
'llm_api_key': config.app.get('narrato_llm_key'),
|
||||
'custom_prompt': custom_prompt
|
||||
}
|
||||
|
||||
progress_callback(40, "正在上传文件...")
|
||||
with open(zip_path, 'rb') as f:
|
||||
files = {'file': (os.path.basename(zip_path), f, 'application/x-zip-compressed')}
|
||||
response = requests.post(
|
||||
f"{api_url}/video/analyze",
|
||||
headers=headers,
|
||||
params=api_params,
|
||||
files=files,
|
||||
timeout=30
|
||||
)
|
||||
response.raise_for_status()
|
||||
|
||||
task_data = response.json()
|
||||
task_id = task_data["data"].get('task_id')
|
||||
if not task_id:
|
||||
raise Exception(f"无效的API<EFBFBD><EFBFBD>应: {response.text}")
|
||||
|
||||
progress_callback(50, "正在等待分析结果...")
|
||||
retry_count = 0
|
||||
max_retries = 60
|
||||
|
||||
while retry_count < max_retries:
|
||||
try:
|
||||
status_response = requests.get(
|
||||
f"{api_url}/video/tasks/{task_id}",
|
||||
headers=headers,
|
||||
timeout=10
|
||||
)
|
||||
status_response.raise_for_status()
|
||||
task_status = status_response.json()['data']
|
||||
|
||||
if task_status['status'] == 'SUCCESS':
|
||||
return task_status['result']['data']
|
||||
elif task_status['status'] in ['FAILURE', 'RETRY']:
|
||||
raise Exception(f"任务失败: {task_status.get('error')}")
|
||||
|
||||
retry_count += 1
|
||||
time.sleep(2)
|
||||
|
||||
except requests.RequestException as e:
|
||||
logger.warning(f"获取任务状态失败,重试中: {str(e)}")
|
||||
retry_count += 1
|
||||
time.sleep(2)
|
||||
continue
|
||||
|
||||
raise Exception("任务执行超时")
|
||||
|
||||
finally:
|
||||
# 清理临时文件
|
||||
try:
|
||||
if os.path.exists(zip_path):
|
||||
os.remove(zip_path)
|
||||
except Exception as e:
|
||||
logger.warning(f"清理临时文件失败: {str(e)}")
|
||||
|
||||
def _get_batch_files(
|
||||
self,
|
||||
keyframe_files: List[str],
|
||||
result: Dict[str, Any],
|
||||
batch_size: int
|
||||
) -> List[str]:
|
||||
"""获取当前批次的图片文件"""
|
||||
batch_start = result['batch_index'] * batch_size
|
||||
batch_end = min(batch_start + batch_size, len(keyframe_files))
|
||||
return keyframe_files[batch_start:batch_end]
|
||||
|
||||
def _get_batch_timestamps(
|
||||
self,
|
||||
batch_files: List[str],
|
||||
prev_batch_files: List[str] = None
|
||||
) -> tuple[str, str, str]:
|
||||
"""获取一批文件的时间戳范围,支持毫秒级精度"""
|
||||
if not batch_files:
|
||||
logger.warning("Empty batch files")
|
||||
return "00:00:00,000", "00:00:00,000", "00:00:00,000-00:00:00,000"
|
||||
|
||||
if len(batch_files) == 1 and prev_batch_files and len(prev_batch_files) > 0:
|
||||
first_frame = os.path.basename(prev_batch_files[-1])
|
||||
last_frame = os.path.basename(batch_files[0])
|
||||
else:
|
||||
first_frame = os.path.basename(batch_files[0])
|
||||
last_frame = os.path.basename(batch_files[-1])
|
||||
|
||||
first_time = first_frame.split('_')[2].replace('.jpg', '')
|
||||
last_time = last_frame.split('_')[2].replace('.jpg', '')
|
||||
|
||||
def format_timestamp(time_str: str) -> str:
|
||||
"""将时间字符串转换为 HH:MM:SS,mmm 格式"""
|
||||
try:
|
||||
if len(time_str) < 4:
|
||||
logger.warning(f"Invalid timestamp format: {time_str}")
|
||||
return "00:00:00,000"
|
||||
|
||||
# 处理毫秒部分
|
||||
if ',' in time_str:
|
||||
time_part, ms_part = time_str.split(',')
|
||||
ms = int(ms_part)
|
||||
else:
|
||||
time_part = time_str
|
||||
ms = 0
|
||||
|
||||
# 处理时分秒
|
||||
parts = time_part.split(':')
|
||||
if len(parts) == 3: # HH:MM:SS
|
||||
h, m, s = map(int, parts)
|
||||
elif len(parts) == 2: # MM:SS
|
||||
h = 0
|
||||
m, s = map(int, parts)
|
||||
else: # SS
|
||||
h = 0
|
||||
m = 0
|
||||
s = int(parts[0])
|
||||
|
||||
# 处理进位
|
||||
if s >= 60:
|
||||
m += s // 60
|
||||
s = s % 60
|
||||
if m >= 60:
|
||||
h += m // 60
|
||||
m = m % 60
|
||||
|
||||
return f"{h:02d}:{m:02d}:{s:02d},{ms:03d}"
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"时间戳格式转换错误 {time_str}: {str(e)}")
|
||||
return "00:00:00,000"
|
||||
|
||||
first_timestamp = format_timestamp(first_time)
|
||||
last_timestamp = format_timestamp(last_time)
|
||||
timestamp_range = f"{first_timestamp}-{last_timestamp}"
|
||||
|
||||
return first_timestamp, last_timestamp, timestamp_range
|
||||
@ -8,6 +8,8 @@ from faster_whisper import WhisperModel
|
||||
from timeit import default_timer as timer
|
||||
from loguru import logger
|
||||
import google.generativeai as genai
|
||||
from moviepy.editor import VideoFileClip
|
||||
import os
|
||||
|
||||
from app.config import config
|
||||
from app.utils import utils
|
||||
@ -362,29 +364,86 @@ def create_with_gemini(audio_file: str, subtitle_file: str = "", api_key: Option
|
||||
return None
|
||||
|
||||
|
||||
def extract_audio_and_create_subtitle(video_file: str, subtitle_file: str = "") -> Optional[str]:
|
||||
"""
|
||||
从视频文件中提取音频并生成字幕文件。
|
||||
|
||||
参数:
|
||||
- video_file: MP4视频文件的路径
|
||||
- subtitle_file: 输出字幕文件的路径(可选)。如果未提供,将根据视频文件名自动生成。
|
||||
|
||||
返回:
|
||||
- str: 生成的字幕文件路径
|
||||
- None: 如果处理过程中出现错误
|
||||
"""
|
||||
try:
|
||||
# 获取视频文件所在目录
|
||||
video_dir = os.path.dirname(video_file)
|
||||
video_name = os.path.splitext(os.path.basename(video_file))[0]
|
||||
|
||||
# 设置音频文件路径
|
||||
audio_file = os.path.join(video_dir, f"{video_name}_audio.wav")
|
||||
|
||||
# 如果未指定字幕文件路径,则自动生成
|
||||
if not subtitle_file:
|
||||
subtitle_file = os.path.join(video_dir, f"{video_name}.srt")
|
||||
|
||||
logger.info(f"开始从视频提取音频: {video_file}")
|
||||
|
||||
# 加载视频文件
|
||||
video = VideoFileClip(video_file)
|
||||
|
||||
# 提取音频并保存为WAV格式
|
||||
logger.info(f"正在提取音频到: {audio_file}")
|
||||
video.audio.write_audiofile(audio_file, codec='pcm_s16le')
|
||||
|
||||
# 关闭视频文件
|
||||
video.close()
|
||||
|
||||
logger.info("音频提取完成,开始生成字幕")
|
||||
|
||||
# 使用create函数生成字幕
|
||||
create(audio_file, subtitle_file)
|
||||
|
||||
# 删除临时音频文件
|
||||
if os.path.exists(audio_file):
|
||||
os.remove(audio_file)
|
||||
logger.info("已清理临时音频文件")
|
||||
|
||||
return subtitle_file
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"处理视频文件时出错: {str(e)}")
|
||||
logger.error(traceback.format_exc())
|
||||
return None
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
task_id = "test456"
|
||||
task_id = "123456"
|
||||
task_dir = utils.task_dir(task_id)
|
||||
subtitle_file = f"{task_dir}/subtitle.srt"
|
||||
subtitle_file = f"{task_dir}/subtitle_123456.srt"
|
||||
audio_file = f"{task_dir}/audio.wav"
|
||||
video_file = "/Users/apple/Desktop/home/NarratoAI/resource/videos/merged_video_1702.mp4"
|
||||
|
||||
subtitles = file_to_subtitles(subtitle_file)
|
||||
print(subtitles)
|
||||
extract_audio_and_create_subtitle(video_file, subtitle_file)
|
||||
|
||||
# script_file = f"{task_dir}/script.json"
|
||||
# with open(script_file, "r") as f:
|
||||
# script_content = f.read()
|
||||
# s = json.loads(script_content)
|
||||
# script = s.get("script")
|
||||
#
|
||||
# correct(subtitle_file, script)
|
||||
# subtitles = file_to_subtitles(subtitle_file)
|
||||
# print(subtitles)
|
||||
|
||||
subtitle_file = f"{task_dir}/subtitle111.srt"
|
||||
create(audio_file, subtitle_file)
|
||||
# # script_file = f"{task_dir}/script.json"
|
||||
# # with open(script_file, "r") as f:
|
||||
# # script_content = f.read()
|
||||
# # s = json.loads(script_content)
|
||||
# # script = s.get("script")
|
||||
# #
|
||||
# # correct(subtitle_file, script)
|
||||
|
||||
# # 使用Gemini模型处理音频
|
||||
# gemini_api_key = config.app.get("gemini_api_key") # 请替换为实际的API密钥
|
||||
# gemini_subtitle_file = create_with_gemini(audio_file, api_key=gemini_api_key)
|
||||
#
|
||||
# if gemini_subtitle_file:
|
||||
# print(f"Gemini生成的字幕文件: {gemini_subtitle_file}")
|
||||
# subtitle_file = f"{task_dir}/subtitle111.srt"
|
||||
# create(audio_file, subtitle_file)
|
||||
|
||||
# # # 使用Gemini模型处理音频
|
||||
# # gemini_api_key = config.app.get("gemini_api_key") # 请替换为实际的API密钥
|
||||
# # gemini_subtitle_file = create_with_gemini(audio_file, api_key=gemini_api_key)
|
||||
# #
|
||||
# # if gemini_subtitle_file:
|
||||
# # print(f"Gemini生成的字幕文件: {gemini_subtitle_file}")
|
||||
|
||||
@ -206,134 +206,14 @@ def generate_final_videos(
|
||||
return final_video_paths, combined_video_paths
|
||||
|
||||
|
||||
def start(task_id, params: VideoParams, stop_at: str = "video"):
|
||||
logger.info(f"start task: {task_id}, stop_at: {stop_at}")
|
||||
sm.state.update_task(task_id, state=const.TASK_STATE_PROCESSING, progress=5)
|
||||
|
||||
if type(params.video_concat_mode) is str:
|
||||
params.video_concat_mode = VideoConcatMode(params.video_concat_mode)
|
||||
|
||||
# 1. Generate script
|
||||
video_script = generate_script(task_id, params)
|
||||
if not video_script:
|
||||
sm.state.update_task(task_id, state=const.TASK_STATE_FAILED)
|
||||
return
|
||||
|
||||
sm.state.update_task(task_id, state=const.TASK_STATE_PROCESSING, progress=10)
|
||||
|
||||
if stop_at == "script":
|
||||
sm.state.update_task(
|
||||
task_id, state=const.TASK_STATE_COMPLETE, progress=100, script=video_script
|
||||
)
|
||||
return {"script": video_script}
|
||||
|
||||
# 2. Generate terms
|
||||
video_terms = ""
|
||||
if params.video_source != "local":
|
||||
video_terms = generate_terms(task_id, params, video_script)
|
||||
if not video_terms:
|
||||
sm.state.update_task(task_id, state=const.TASK_STATE_FAILED)
|
||||
return
|
||||
|
||||
save_script_data(task_id, video_script, video_terms, params)
|
||||
|
||||
if stop_at == "terms":
|
||||
sm.state.update_task(
|
||||
task_id, state=const.TASK_STATE_COMPLETE, progress=100, terms=video_terms
|
||||
)
|
||||
return {"script": video_script, "terms": video_terms}
|
||||
|
||||
sm.state.update_task(task_id, state=const.TASK_STATE_PROCESSING, progress=20)
|
||||
|
||||
# 3. Generate audio
|
||||
audio_file, audio_duration, sub_maker = generate_audio(task_id, params, video_script)
|
||||
if not audio_file:
|
||||
sm.state.update_task(task_id, state=const.TASK_STATE_FAILED)
|
||||
return
|
||||
|
||||
sm.state.update_task(task_id, state=const.TASK_STATE_PROCESSING, progress=30)
|
||||
|
||||
if stop_at == "audio":
|
||||
sm.state.update_task(
|
||||
task_id,
|
||||
state=const.TASK_STATE_COMPLETE,
|
||||
progress=100,
|
||||
audio_file=audio_file,
|
||||
)
|
||||
return {"audio_file": audio_file, "audio_duration": audio_duration}
|
||||
|
||||
# 4. Generate subtitle
|
||||
subtitle_path = generate_subtitle(task_id, params, video_script, sub_maker, audio_file)
|
||||
|
||||
if stop_at == "subtitle":
|
||||
sm.state.update_task(
|
||||
task_id,
|
||||
state=const.TASK_STATE_COMPLETE,
|
||||
progress=100,
|
||||
subtitle_path=subtitle_path,
|
||||
)
|
||||
return {"subtitle_path": subtitle_path}
|
||||
|
||||
sm.state.update_task(task_id, state=const.TASK_STATE_PROCESSING, progress=40)
|
||||
|
||||
# 5. Get video materials
|
||||
downloaded_videos = get_video_materials(
|
||||
task_id, params, video_terms, audio_duration
|
||||
)
|
||||
if not downloaded_videos:
|
||||
sm.state.update_task(task_id, state=const.TASK_STATE_FAILED)
|
||||
return
|
||||
|
||||
if stop_at == "materials":
|
||||
sm.state.update_task(
|
||||
task_id,
|
||||
state=const.TASK_STATE_COMPLETE,
|
||||
progress=100,
|
||||
materials=downloaded_videos,
|
||||
)
|
||||
return {"materials": downloaded_videos}
|
||||
|
||||
sm.state.update_task(task_id, state=const.TASK_STATE_PROCESSING, progress=50)
|
||||
|
||||
# 6. Generate final videos
|
||||
final_video_paths, combined_video_paths = generate_final_videos(
|
||||
task_id, params, downloaded_videos, audio_file, subtitle_path
|
||||
)
|
||||
|
||||
if not final_video_paths:
|
||||
sm.state.update_task(task_id, state=const.TASK_STATE_FAILED)
|
||||
return
|
||||
|
||||
logger.success(
|
||||
f"task {task_id} finished, generated {len(final_video_paths)} videos."
|
||||
)
|
||||
|
||||
kwargs = {
|
||||
"videos": final_video_paths,
|
||||
"combined_videos": combined_video_paths,
|
||||
"script": video_script,
|
||||
"terms": video_terms,
|
||||
"audio_file": audio_file,
|
||||
"audio_duration": audio_duration,
|
||||
"subtitle_path": subtitle_path,
|
||||
"materials": downloaded_videos,
|
||||
}
|
||||
sm.state.update_task(
|
||||
task_id, state=const.TASK_STATE_COMPLETE, progress=100, **kwargs
|
||||
)
|
||||
return kwargs
|
||||
|
||||
|
||||
def start_subclip(task_id: str, params: VideoClipParams, subclip_path_videos: list):
|
||||
"""
|
||||
后台任务(自动剪辑视频进行剪辑)
|
||||
|
||||
task_id: 任务ID
|
||||
params: 剪辑参数
|
||||
subclip_path_videos: 视频文件路径
|
||||
|
||||
"""
|
||||
def start_subclip(task_id: str, params: VideoClipParams, subclip_path_videos: dict):
|
||||
"""后台任务(自动剪辑视频进行剪辑)"""
|
||||
logger.info(f"\n\n## 开始任务: {task_id}")
|
||||
|
||||
# 初始化 ImageMagick
|
||||
if not utils.init_imagemagick():
|
||||
logger.warning("ImageMagick 初始化失败,字幕可能无法正常显示")
|
||||
|
||||
sm.state.update_task(task_id, state=const.TASK_STATE_PROCESSING, progress=5)
|
||||
|
||||
# tts 角色名称
|
||||
@ -341,8 +221,7 @@ def start_subclip(task_id: str, params: VideoClipParams, subclip_path_videos: li
|
||||
|
||||
logger.info("\n\n## 1. 加载视频脚本")
|
||||
video_script_path = path.join(params.video_clip_json_path)
|
||||
# video_script_path = video_clip_json_path
|
||||
# 判断json文件是否存在
|
||||
|
||||
if path.exists(video_script_path):
|
||||
try:
|
||||
with open(video_script_path, "r", encoding="utf-8") as f:
|
||||
@ -355,10 +234,12 @@ def start_subclip(task_id: str, params: VideoClipParams, subclip_path_videos: li
|
||||
logger.debug(f"解说完整脚本: \n{video_script}")
|
||||
logger.debug(f"解说 OST 列表: \n{video_ost}")
|
||||
logger.debug(f"解说时间戳列表: \n{time_list}")
|
||||
|
||||
# 获取视频总时长(单位 s)
|
||||
total_duration = list_script[-1]['new_timestamp']
|
||||
total_duration = int(total_duration.split("-")[1].split(":")[0]) * 60 + int(
|
||||
total_duration.split("-")[1].split(":")[1])
|
||||
last_timestamp = list_script[-1]['new_timestamp']
|
||||
end_time = last_timestamp.split("-")[1]
|
||||
total_duration = utils.time_to_seconds(end_time)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"无法读取视频json脚本,请检查配置是否正确。{e}")
|
||||
raise ValueError("无法读取视频json脚本,请检查配置是否正确")
|
||||
@ -366,32 +247,51 @@ def start_subclip(task_id: str, params: VideoClipParams, subclip_path_videos: li
|
||||
logger.error(f"video_script_path: {video_script_path} \n\n", traceback.format_exc())
|
||||
raise ValueError("解说脚本不存在!请检查配置是否正确。")
|
||||
|
||||
logger.info("\n\n## 2. 生成音频列表")
|
||||
audio_files, sub_maker_list = voice.tts_multiple(
|
||||
task_id=task_id,
|
||||
list_script=list_script,
|
||||
voice_name=voice_name,
|
||||
voice_rate=params.voice_rate,
|
||||
voice_pitch=params.voice_pitch,
|
||||
force_regenerate=True
|
||||
logger.info("\n\n## 2. 根据OST设置生成音频列表")
|
||||
# 只为OST=0或2的片段生成TTS音频
|
||||
tts_segments = [
|
||||
segment for segment in list_script
|
||||
if segment['OST'] in [0, 2]
|
||||
]
|
||||
# logger.debug(f"tts_segments: {tts_segments}")
|
||||
if tts_segments:
|
||||
audio_files, sub_maker_list = voice.tts_multiple(
|
||||
task_id=task_id,
|
||||
list_script=tts_segments, # 只传入需要TTS的片段
|
||||
voice_name=voice_name,
|
||||
voice_rate=params.voice_rate,
|
||||
voice_pitch=params.voice_pitch,
|
||||
force_regenerate=True
|
||||
)
|
||||
if audio_files is None:
|
||||
sm.state.update_task(task_id, state=const.TASK_STATE_FAILED)
|
||||
logger.error("TTS转换音频失败, 可能是网络不可用! 如果您在中国, 请使用VPN.")
|
||||
return
|
||||
else:
|
||||
audio_files = []
|
||||
|
||||
logger.info(f"合并音频文件:\n{audio_files}")
|
||||
# 传入OST信息以便正确处理音频
|
||||
final_audio = audio_merger.merge_audio_files(
|
||||
task_id=task_id,
|
||||
audio_files=audio_files,
|
||||
total_duration=total_duration,
|
||||
list_script=list_script # 传入完整脚本以便处理OST
|
||||
)
|
||||
if audio_files is None:
|
||||
sm.state.update_task(task_id, state=const.TASK_STATE_FAILED)
|
||||
logger.error(
|
||||
"TTS转换音频失败, 可能是网络不可用! 如果您在中国, 请使用VPN.")
|
||||
return
|
||||
logger.info(f"合并音频:\n\n {audio_files}")
|
||||
audio_file = audio_merger.merge_audio_files(task_id, audio_files, total_duration, list_script)
|
||||
|
||||
sm.state.update_task(task_id, state=const.TASK_STATE_PROCESSING, progress=30)
|
||||
|
||||
# 只为OST=0或2的片段生成字幕
|
||||
subtitle_path = ""
|
||||
if params.subtitle_enabled:
|
||||
subtitle_path = path.join(utils.task_dir(task_id), f"subtitle.srt")
|
||||
subtitle_provider = config.app.get("subtitle_provider", "").strip().lower()
|
||||
logger.info(f"\n\n## 3. 生成字幕、提供程序是: {subtitle_provider}")
|
||||
# 使用 faster-whisper-large-v2 模型生成字幕
|
||||
subtitle.create(audio_file=audio_file, subtitle_file=subtitle_path)
|
||||
|
||||
subtitle.create(
|
||||
audio_file=final_audio,
|
||||
subtitle_file=subtitle_path,
|
||||
)
|
||||
|
||||
subtitle_lines = subtitle.file_to_subtitles(subtitle_path)
|
||||
if not subtitle_lines:
|
||||
@ -402,7 +302,7 @@ def start_subclip(task_id: str, params: VideoClipParams, subclip_path_videos: li
|
||||
|
||||
logger.info("\n\n## 4. 裁剪视频")
|
||||
subclip_videos = [x for x in subclip_path_videos.values()]
|
||||
logger.debug(f"\n\n## 裁剪后的视频文件列表: \n{subclip_videos}")
|
||||
# logger.debug(f"\n\n## 裁剪后的视频文件列表: \n{subclip_videos}")
|
||||
|
||||
if not subclip_videos:
|
||||
sm.state.update_task(task_id, state=const.TASK_STATE_FAILED)
|
||||
@ -434,14 +334,15 @@ def start_subclip(task_id: str, params: VideoClipParams, subclip_path_videos: li
|
||||
|
||||
final_video_path = path.join(utils.task_dir(task_id), f"final-{index}.mp4")
|
||||
|
||||
logger.info(f"\n\n## 6. 最后一步: {index} => {final_video_path}")
|
||||
# 把所有东西合到在一起
|
||||
logger.info(f"\n\n## 6. 最后合成: {index} => {final_video_path}")
|
||||
# 传入OST信息以便正确处理音频和视频
|
||||
video.generate_video_v2(
|
||||
video_path=combined_video_path,
|
||||
audio_path=audio_file,
|
||||
audio_path=final_audio,
|
||||
subtitle_path=subtitle_path,
|
||||
output_file=final_video_path,
|
||||
params=params,
|
||||
list_script=list_script # 传入完整脚本以便处理OST
|
||||
)
|
||||
|
||||
_progress += 50 / 2
|
||||
|
||||
@ -18,6 +18,15 @@ from app.utils import utils
|
||||
|
||||
|
||||
def get_bgm_file(bgm_type: str = "random", bgm_file: str = ""):
|
||||
"""
|
||||
获取背景音乐文件路径
|
||||
Args:
|
||||
bgm_type: 背景音乐类型,可选值: random(随机), ""(无背景音乐)
|
||||
bgm_file: 指定的背景音乐文件路径
|
||||
|
||||
Returns:
|
||||
str: 背景音乐文件路径
|
||||
"""
|
||||
if not bgm_type:
|
||||
return ""
|
||||
|
||||
@ -48,21 +57,35 @@ def get_bgm_file(bgm_type: str = "random", bgm_file: str = ""):
|
||||
|
||||
|
||||
def combine_videos(
|
||||
combined_video_path: str,
|
||||
video_paths: List[str],
|
||||
audio_file: str,
|
||||
video_aspect: VideoAspect = VideoAspect.portrait,
|
||||
video_concat_mode: VideoConcatMode = VideoConcatMode.random,
|
||||
max_clip_duration: int = 5,
|
||||
threads: int = 2,
|
||||
combined_video_path: str,
|
||||
video_paths: List[str],
|
||||
audio_file: str,
|
||||
video_aspect: VideoAspect = VideoAspect.portrait,
|
||||
video_concat_mode: VideoConcatMode = VideoConcatMode.random,
|
||||
max_clip_duration: int = 5,
|
||||
threads: int = 2,
|
||||
) -> str:
|
||||
"""
|
||||
合并多个视频片段
|
||||
Args:
|
||||
combined_video_path: 合并后的视频保存路径
|
||||
video_paths: 待合并的视频路径列表
|
||||
audio_file: 音频文件路径
|
||||
video_aspect: 视频宽高比
|
||||
video_concat_mode: 视频拼接模式(随机/顺序)
|
||||
max_clip_duration: 每个片段的最大时长(秒)
|
||||
threads: 处理线程数
|
||||
|
||||
Returns:
|
||||
str: 合并后的视频路径
|
||||
"""
|
||||
audio_clip = AudioFileClip(audio_file)
|
||||
audio_duration = audio_clip.duration
|
||||
logger.info(f"max duration of audio: {audio_duration} seconds")
|
||||
# Required duration of each clip
|
||||
logger.info(f"音频时长: {audio_duration} 秒")
|
||||
# 每个片段的所需时长
|
||||
req_dur = audio_duration / len(video_paths)
|
||||
req_dur = max_clip_duration
|
||||
logger.info(f"each clip will be maximum {req_dur} seconds long")
|
||||
logger.info(f"每个片段最大时长: {req_dur} 秒")
|
||||
output_dir = os.path.dirname(combined_video_path)
|
||||
|
||||
aspect = VideoAspect(video_aspect)
|
||||
@ -81,22 +104,22 @@ def combine_videos(
|
||||
end_time = min(start_time + max_clip_duration, clip_duration)
|
||||
split_clip = clip.subclip(start_time, end_time)
|
||||
raw_clips.append(split_clip)
|
||||
# logger.info(f"splitting from {start_time:.2f} to {end_time:.2f}, clip duration {clip_duration:.2f}, split_clip duration {split_clip.duration:.2f}")
|
||||
# logger.info(f"从 {start_time:.2f} 到 {end_time:.2f}, 片段时长 {clip_duration:.2f}, 分割片段时长 {split_clip.duration:.2f}")
|
||||
start_time = end_time
|
||||
if video_concat_mode.value == VideoConcatMode.sequential.value:
|
||||
break
|
||||
|
||||
# random video_paths order
|
||||
# 随机视频片段顺序
|
||||
if video_concat_mode.value == VideoConcatMode.random.value:
|
||||
random.shuffle(raw_clips)
|
||||
|
||||
# Add downloaded clips over and over until the duration of the audio (max_duration) has been reached
|
||||
# 添加下载的片段,直到音频时长(max_duration)达到
|
||||
while video_duration < audio_duration:
|
||||
for clip in raw_clips:
|
||||
# Check if clip is longer than the remaining audio
|
||||
# 检查片段是否比剩余音频时长长
|
||||
if (audio_duration - video_duration) < clip.duration:
|
||||
clip = clip.subclip(0, (audio_duration - video_duration))
|
||||
# Only shorten clips if the calculated clip length (req_dur) is shorter than the actual clip to prevent still image
|
||||
# 仅当计算的片段时长(req_dur)小于实际片段时长时,缩短片段
|
||||
elif req_dur < clip.duration:
|
||||
clip = clip.subclip(0, req_dur)
|
||||
clip = clip.set_fps(30)
|
||||
@ -134,7 +157,7 @@ def combine_videos(
|
||||
)
|
||||
|
||||
logger.info(
|
||||
f"resizing video to {video_width} x {video_height}, clip size: {clip_w} x {clip_h}"
|
||||
f"调整视频尺寸为 {video_width} x {video_height}, 片段尺寸: {clip_w} x {clip_h}"
|
||||
)
|
||||
|
||||
if clip.duration > max_clip_duration:
|
||||
@ -146,7 +169,7 @@ def combine_videos(
|
||||
video_clip = concatenate_videoclips(clips)
|
||||
video_clip = video_clip.set_fps(30)
|
||||
logger.info("writing")
|
||||
# https://github.com/harry0703/NarratoAI/issues/111#issuecomment-2032354030
|
||||
|
||||
video_clip.write_videofile(
|
||||
filename=combined_video_path,
|
||||
threads=threads,
|
||||
@ -161,6 +184,17 @@ def combine_videos(
|
||||
|
||||
|
||||
def wrap_text(text, max_width, font, fontsize=60):
|
||||
"""
|
||||
文本自动换行处理
|
||||
Args:
|
||||
text: 待处理的文本
|
||||
max_width: 最大宽度
|
||||
font: 字体文件路径
|
||||
fontsize: 字体大小
|
||||
|
||||
Returns:
|
||||
tuple: (换行后的文本, 文本高度)
|
||||
"""
|
||||
# 创建字体对象
|
||||
font = ImageFont.truetype(font, fontsize)
|
||||
|
||||
@ -220,6 +254,14 @@ def wrap_text(text, max_width, font, fontsize=60):
|
||||
|
||||
@contextmanager
|
||||
def manage_clip(clip):
|
||||
"""
|
||||
视频片段资源管理器
|
||||
Args:
|
||||
clip: 视频片段对象
|
||||
|
||||
Yields:
|
||||
VideoFileClip: 视频片段对象
|
||||
"""
|
||||
try:
|
||||
yield clip
|
||||
finally:
|
||||
@ -232,6 +274,7 @@ def generate_video_v2(
|
||||
audio_path: str,
|
||||
subtitle_path: str,
|
||||
output_file: str,
|
||||
list_script: list,
|
||||
params: Union[VideoParams, VideoClipParams],
|
||||
progress_callback=None,
|
||||
):
|
||||
@ -250,7 +293,7 @@ def generate_video_v2(
|
||||
"""
|
||||
total_steps = 4
|
||||
current_step = 0
|
||||
|
||||
|
||||
def update_progress(step_name):
|
||||
nonlocal current_step
|
||||
current_step += 1
|
||||
@ -260,7 +303,7 @@ def generate_video_v2(
|
||||
|
||||
try:
|
||||
validate_params(video_path, audio_path, output_file, params)
|
||||
|
||||
|
||||
with manage_clip(VideoFileClip(video_path)) as video_clip:
|
||||
aspect = VideoAspect(params.video_aspect)
|
||||
video_width, video_height = aspect.to_resolution()
|
||||
@ -304,7 +347,7 @@ def generate_video_v2(
|
||||
_clip = _clip.set_start(subtitle_item[0][0])
|
||||
_clip = _clip.set_end(subtitle_item[0][1])
|
||||
_clip = _clip.set_duration(duration)
|
||||
|
||||
|
||||
if params.subtitle_position == "bottom":
|
||||
_clip = _clip.set_position(("center", video_height * 0.95 - _clip.h))
|
||||
elif params.subtitle_position == "top":
|
||||
@ -335,6 +378,7 @@ def generate_video_v2(
|
||||
update_progress("字幕处理完成")
|
||||
|
||||
# 合并音频和导出
|
||||
logger.info("开始导出视频 (此步骤耗时较长请耐心等待)")
|
||||
video_clip = video_clip.set_audio(final_audio)
|
||||
video_clip.write_videofile(
|
||||
output_file,
|
||||
@ -344,7 +388,7 @@ def generate_video_v2(
|
||||
logger=None,
|
||||
fps=30,
|
||||
)
|
||||
|
||||
|
||||
except FileNotFoundError as e:
|
||||
logger.error(f"文件不存在: {str(e)}")
|
||||
raise
|
||||
@ -356,15 +400,25 @@ def generate_video_v2(
|
||||
|
||||
|
||||
def process_audio_tracks(original_audio, new_audio, params, video_duration):
|
||||
"""处理所有音轨"""
|
||||
"""
|
||||
处理所有音轨(原声、配音、背景音乐)
|
||||
Args:
|
||||
original_audio: 原始音频
|
||||
new_audio: 新音频
|
||||
params: 视频参数
|
||||
video_duration: 视频时长
|
||||
|
||||
Returns:
|
||||
CompositeAudioClip: 合成后的音频
|
||||
"""
|
||||
audio_tracks = []
|
||||
|
||||
|
||||
if original_audio is not None:
|
||||
audio_tracks.append(original_audio)
|
||||
|
||||
|
||||
new_audio = new_audio.volumex(params.voice_volume)
|
||||
audio_tracks.append(new_audio)
|
||||
|
||||
|
||||
# 处理背景音乐
|
||||
bgm_file = get_bgm_file(bgm_type=params.bgm_type, bgm_file=params.bgm_file)
|
||||
if bgm_file:
|
||||
@ -374,35 +428,54 @@ def process_audio_tracks(original_audio, new_audio, params, video_duration):
|
||||
audio_tracks.append(bgm_clip)
|
||||
except Exception as e:
|
||||
logger.error(f"添加背景音乐失败: {str(e)}")
|
||||
|
||||
|
||||
return CompositeAudioClip(audio_tracks) if audio_tracks else new_audio
|
||||
|
||||
|
||||
def process_subtitles(subtitle_path, video_clip, video_duration, create_text_clip):
|
||||
"""处理字幕"""
|
||||
"""
|
||||
处理字幕
|
||||
Args:
|
||||
subtitle_path: 字幕文件路径
|
||||
video_clip: 视频片段
|
||||
video_duration: 视频时长
|
||||
create_text_clip: 创建文本片段的回调函数
|
||||
|
||||
Returns:
|
||||
CompositeVideoClip: 添加字幕后的视频
|
||||
"""
|
||||
if not (subtitle_path and os.path.exists(subtitle_path)):
|
||||
return video_clip
|
||||
|
||||
|
||||
sub = SubtitlesClip(subtitles=subtitle_path, encoding="utf-8")
|
||||
text_clips = []
|
||||
|
||||
|
||||
for item in sub.subtitles:
|
||||
clip = create_text_clip(subtitle_item=item)
|
||||
|
||||
|
||||
# 时间范围调整
|
||||
start_time = max(clip.start, 0)
|
||||
if start_time >= video_duration:
|
||||
continue
|
||||
|
||||
|
||||
end_time = min(clip.end, video_duration)
|
||||
clip = clip.set_start(start_time).set_end(end_time)
|
||||
text_clips.append(clip)
|
||||
|
||||
|
||||
logger.info(f"处理了 {len(text_clips)} 段字幕")
|
||||
return CompositeVideoClip([video_clip, *text_clips])
|
||||
|
||||
|
||||
def preprocess_video(materials: List[MaterialInfo], clip_duration=4):
|
||||
"""
|
||||
预处理视频素材
|
||||
Args:
|
||||
materials: 素材信息列表
|
||||
clip_duration: 片段时长(秒)
|
||||
|
||||
Returns:
|
||||
List[MaterialInfo]: 处理后的素材信息列表
|
||||
"""
|
||||
for material in materials:
|
||||
if not material.url:
|
||||
continue
|
||||
@ -430,12 +503,12 @@ def preprocess_video(materials: List[MaterialInfo], clip_duration=4):
|
||||
# 使用resize方法来添加缩放效果。这里使用了lambda函数来使得缩放效果随时间变化。
|
||||
# 假设我们想要从原始大小逐渐放大到120%的大小。
|
||||
# t代表当前时间,clip.duration为视频总时长,这里是3秒。
|
||||
# 注意:1 表示100%的大小,所以1.2表示120%的大小
|
||||
# 注意:1 表示100%的大小所以1.2表示120%的大小
|
||||
zoom_clip = clip.resize(
|
||||
lambda t: 1 + (clip_duration * 0.03) * (t / clip.duration)
|
||||
)
|
||||
|
||||
# 如果需要,可以创建一个包含缩放剪辑的复合视频剪辑
|
||||
# 如果需要,可以创建一个包含缩放剪辑的复合频剪辑
|
||||
# (这在您想要在视频中添加其他元素时非常有用)
|
||||
final_clip = CompositeVideoClip([zoom_clip])
|
||||
|
||||
@ -472,7 +545,7 @@ def combine_clip_videos(combined_video_path: str,
|
||||
from app.utils.utils import calculate_total_duration
|
||||
audio_duration = calculate_total_duration(list_script)
|
||||
logger.info(f"音频的最大持续时间: {audio_duration} s")
|
||||
|
||||
|
||||
output_dir = os.path.dirname(combined_video_path)
|
||||
aspect = VideoAspect(video_aspect)
|
||||
video_width, video_height = aspect.to_resolution()
|
||||
@ -481,25 +554,25 @@ def combine_clip_videos(combined_video_path: str,
|
||||
for video_path, video_ost in zip(video_paths, video_ost_list):
|
||||
try:
|
||||
clip = VideoFileClip(video_path)
|
||||
|
||||
|
||||
if video_ost == 0: # 不保留原声
|
||||
clip = clip.without_audio()
|
||||
# video_ost 为 1 或 2 时都保留原声,不需要特殊处理
|
||||
|
||||
|
||||
clip = clip.set_fps(30)
|
||||
|
||||
# 处理视频尺寸
|
||||
clip_w, clip_h = clip.size
|
||||
if clip_w != video_width or clip_h != video_height:
|
||||
clip = resize_video_with_padding(
|
||||
clip,
|
||||
target_width=video_width,
|
||||
clip,
|
||||
target_width=video_width,
|
||||
target_height=video_height
|
||||
)
|
||||
logger.info(f"视频 {video_path} 已调整尺寸为 {video_width} x {video_height}")
|
||||
|
||||
clips.append(clip)
|
||||
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"处理视频 {video_path} 时出错: {str(e)}")
|
||||
continue
|
||||
@ -510,8 +583,8 @@ def combine_clip_videos(combined_video_path: str,
|
||||
try:
|
||||
video_clip = concatenate_videoclips(clips)
|
||||
video_clip = video_clip.set_fps(30)
|
||||
|
||||
logger.info("开始合并视频...")
|
||||
|
||||
logger.info("开始合并视频... (过程中出现 UserWarning: 不必理会)")
|
||||
video_clip.write_videofile(
|
||||
filename=combined_video_path,
|
||||
threads=threads,
|
||||
@ -521,7 +594,7 @@ def combine_clip_videos(combined_video_path: str,
|
||||
temp_audiofile=os.path.join(output_dir, "temp-audio.m4a")
|
||||
)
|
||||
finally:
|
||||
# 确保资源被正确<EFBFBD><EFBFBD><EFBFBD>放
|
||||
# 确保资源被正确放
|
||||
video_clip.close()
|
||||
for clip in clips:
|
||||
clip.close()
|
||||
@ -531,13 +604,22 @@ def combine_clip_videos(combined_video_path: str,
|
||||
|
||||
|
||||
def resize_video_with_padding(clip, target_width: int, target_height: int):
|
||||
"""辅助函数:调整视频尺寸并添加黑边"""
|
||||
"""
|
||||
调整视频尺寸并添加黑边
|
||||
Args:
|
||||
clip: 视频片段
|
||||
target_width: 目标宽度
|
||||
target_height: 目标高度
|
||||
|
||||
Returns:
|
||||
CompositeVideoClip: 调整尺寸后的视频
|
||||
"""
|
||||
clip_ratio = clip.w / clip.h
|
||||
target_ratio = target_width / target_height
|
||||
|
||||
if clip_ratio == target_ratio:
|
||||
return clip.resize((target_width, target_height))
|
||||
|
||||
|
||||
if clip_ratio > target_ratio:
|
||||
scale_factor = target_width / clip.w
|
||||
else:
|
||||
@ -548,10 +630,10 @@ def resize_video_with_padding(clip, target_width: int, target_height: int):
|
||||
clip_resized = clip.resize(newsize=(new_width, new_height))
|
||||
|
||||
background = ColorClip(
|
||||
size=(target_width, target_height),
|
||||
size=(target_width, target_height),
|
||||
color=(0, 0, 0)
|
||||
).set_duration(clip.duration)
|
||||
|
||||
|
||||
return CompositeVideoClip([
|
||||
background,
|
||||
clip_resized.set_position("center")
|
||||
@ -559,106 +641,100 @@ def resize_video_with_padding(clip, target_width: int, target_height: int):
|
||||
|
||||
|
||||
def validate_params(video_path, audio_path, output_file, params):
|
||||
"""验证输入参数"""
|
||||
"""
|
||||
验证输入参数
|
||||
Args:
|
||||
video_path: 视频文件路径
|
||||
audio_path: 音频文件路径
|
||||
output_file: 输出文件路径
|
||||
params: 视频参数
|
||||
|
||||
Raises:
|
||||
FileNotFoundError: 文件不存在时抛出
|
||||
ValueError: 参数无效时抛出
|
||||
"""
|
||||
if not os.path.exists(video_path):
|
||||
raise FileNotFoundError(f"视频文件不存在: {video_path}")
|
||||
|
||||
|
||||
if not os.path.exists(audio_path):
|
||||
raise FileNotFoundError(f"音频文件不存在: {audio_path}")
|
||||
|
||||
|
||||
output_dir = os.path.dirname(output_file)
|
||||
if not os.path.exists(output_dir):
|
||||
raise FileNotFoundError(f"输出目录不存在: {output_dir}")
|
||||
|
||||
|
||||
if not hasattr(params, 'video_aspect'):
|
||||
raise ValueError("params 缺少必要参数 video_aspect")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
# combined_video_path = "../../storage/tasks/12312312/com123.mp4"
|
||||
#
|
||||
# video_paths = ['../../storage/cache_videos/vid-00_00-00_03.mp4',
|
||||
# '../../storage/cache_videos/vid-00_03-00_07.mp4',
|
||||
# '../../storage/cache_videos/vid-00_12-00_17.mp4',
|
||||
# '../../storage/cache_videos/vid-00_26-00_31.mp4']
|
||||
# video_ost_list = [False, True, False, True]
|
||||
# list_script = [
|
||||
# {
|
||||
# "picture": "夜晚,一个小孩在树林里奔跑,后面有人拿着火把在追赶",
|
||||
# "timestamp": "00:00-00:03",
|
||||
# "narration": "夜黑风高的树林,一个小孩在拼命奔跑,后面的人穷追不舍!",
|
||||
# "OST": False,
|
||||
# "new_timestamp": "00:00-00:03"
|
||||
# },
|
||||
# {
|
||||
# "picture": "追赶的人命令抓住小孩",
|
||||
# "timestamp": "00:03-00:07",
|
||||
# "narration": "原声播放1",
|
||||
# "OST": True,
|
||||
# "new_timestamp": "00:03-00:07"
|
||||
# },
|
||||
# {
|
||||
# "picture": "小孩躲在草丛里,黑衣人用脚踢了踢他",
|
||||
# "timestamp": "00:12-00:17",
|
||||
# "narration": "小孩脱下外套,跑进树林, 一路奔跑,直到第二天清晨",
|
||||
# "OST": False,
|
||||
# "new_timestamp": "00:07-00:12"
|
||||
# },
|
||||
# {
|
||||
# "picture": "小孩跑到车前,慌慌张张地对女人说有人要杀他",
|
||||
# "timestamp": "00:26-00:31",
|
||||
# "narration": "原声播放2",
|
||||
# "OST": True,
|
||||
# "new_timestamp": "00:12-00:17"
|
||||
# }
|
||||
# ]
|
||||
combined_video_path = "../../storage/tasks/123/combined.mp4"
|
||||
|
||||
video_paths = ['../../storage/temp/clip_video/0b545e689a182a91af2163c7c0ca7ca3/vid-00-00-10_000-00-00-43_039.mp4',
|
||||
'../../storage/temp/clip_video/0b545e689a182a91af2163c7c0ca7ca3/vid-00-00-45_439-00-01-01_600.mp4',
|
||||
'../../storage/temp/clip_video/0b545e689a182a91af2163c7c0ca7ca3/vid-00-01-07_920-00-01-25_719.mp4',
|
||||
'../../storage/temp/clip_video/0b545e689a182a91af2163c7c0ca7ca3/vid-00-01-36_959-00-01-53_719.mp4']
|
||||
video_ost_list = [2, 2, 2, 2]
|
||||
list_script = [
|
||||
{
|
||||
"timestamp": "00:10-00:43",
|
||||
"picture": "好的,以下是视频画面的客观描述:\n\n视频显示一个男人在一个树木繁茂的地区,靠近一个泥土斜坡他穿着一件深色T恤、卡其色长裤和登山靴。他背着一个军绿色背包,里面似乎装有头和其他工具。\n\n第一个镜头显示该男子从远处走近斜坡,背对着镜头。下一个镜头特写显示了的背包,一个镐头从背包中伸出来。下一个镜头显示该男子用镐头敲打斜坡。下一个镜头是该男子脚上的特写镜头,他穿着登山靴,正站在泥土斜坡上。最后一个镜显示该男子在斜坡上,仔细地拨开树根和泥土。周围的环境是树木繁茂的,阳光透过树叶照射下来。土壤是浅棕色的,斜坡上有许多树根和植被。",
|
||||
"narration": "(接上文)好吧,今天我们的男主角,背着一个看似随时要发射军绿色背包,竟然化身“泥土探险家”,在斜坡上挥舞着镐头!他这是准备挖宝还是给树根做个“美容”?阳光洒下来,简直是自然界的聚光灯,仿佛在说:“快来看看,这位勇士要挑战泥土极限!”我只能默默想,如果树根能说话,它们一定会喊:“别打我,我还有家人!”这就是生活,总有些搞笑的瞬间等着我们去发现!",
|
||||
"OST": 2,
|
||||
"new_timestamp": "00:00:00,000-00:00:33,000"
|
||||
},
|
||||
{
|
||||
"timestamp": "00:45-01:01",
|
||||
"picture": "好的以下是视频画面的客观描述:\n\n视频显示了一个人在森林里挖掘。\n\n第一个镜头是地面特写,显示出松<EFBFBD><EFBFBD>的泥土、碎石和落叶。光线照在部分区域。\n\n第二个镜头中,一模糊不清的蹲一个树根旁挖掘,一个橄榄绿色的背包放在地上。树根缠绕着常春藤。\n\n第三个镜头显示该人在一个更开阔的区域挖掘,那里有一些树根,以及部分倒的树干。他起来像是在挖掘一个较大的坑。\n\n第四个镜头是特写镜头,显示该人用工具清理土坑的墙壁。\n\n第五个镜头是土坑内部的特写镜头,可以看到土质的纹理,有一些小树根和它植被的残留物。",
|
||||
"narration": "现在,这位勇敢的挖掘者就像个“现代版的土豆农夫”,在林里开辟新天地。的目标是什么?挖一个宝藏还块“树根披萨”?小心哦,别让树根追着你喊:“不要挖我,我也是有故事的!”",
|
||||
"OST": 2,
|
||||
"new_timestamp": "00:00:33,000-00:00:49,000"
|
||||
},
|
||||
{
|
||||
"timestamp": "01:07-01:25",
|
||||
"picture": "好,以下是视频画面的客观描述:\n\n画面1:特写镜头,显示出一丛带有水珠的深绿色灌木叶片。叶片呈椭圆形,边缘光滑。背景是树根和泥土。\n\n画面2:一个留着胡子的男人正在一个森林中土坑里挖掘。他穿着黑色T恤和卡其色裤子,跪在地,用具挖掘泥土。周围环绕着树木、树根和灌木。一个倒下的树干横跨土坑上方。\n\n画面3:同一个男人坐在他刚才挖的坑的边缘,看着前方。他的表情似乎略带沉思。背景与画面2相同。\n\n画面4:一个广角镜头显示出他挖出的坑。这是一个不规则形状的土坑,在树木繁茂的斜坡上。土壤呈深棕色,可见树根。\n\n画面5:同一个男人跪在地上,用一把小斧头砍一根木头。他穿着与前几个画面相同的衣服。地面上覆盖着落叶。周围是树木和灌木。",
|
||||
"narration": "“哎呀,这片灌木叶子滴水如雨,感觉像是大自然的洗发水广告!但我这位‘挖宝达人’似乎更适合拍个‘森林里的单身狗’真人秀。等会儿,我要给树根唱首歌,听说它们爱音乐!”",
|
||||
"OST": 2,
|
||||
"new_timestamp": "00:00:49,000-00:01:07,000"
|
||||
},
|
||||
{
|
||||
"timestamp": "01:36-01:53",
|
||||
"picture": "好的,以下是视频画面内容的客观描述:\n\n视频包含三个镜头:\n\n**镜头一:**个小型、浅水池塘,位于树林中。池塘的水看起来浑浊,呈绿褐色。池塘周围遍布泥土和落叶。多根树枝和树干横跨池塘,部分浸没在水中。周围的植被茂密主要是深色树木和灌木。\n\n**镜头二:**距拍摄树深处,阳光透过树叶洒落在植被上。镜头中可见粗大的树干、树枝和各种绿叶植物。部分树枝似乎被砍断,切口可见。\n\n**镜头三:**近距离特写镜头,聚焦在树枝和绿叶上。叶片呈圆形,颜色为鲜绿色,有些叶片上有缺损。树枝颜色较深,呈现深褐色。背景是模糊的树林。\n",
|
||||
"narration": "“好吧,看来我们的‘挖宝达人’终于找到了一‘宝藏’——一个色泽如同绿豆汤的池塘!我敢打赌,这里不仅是小鱼儿的游乐场更是树枝们的‘水疗中心’!下次来这里,我得带上浮潜装备!”",
|
||||
"OST": 2,
|
||||
"new_timestamp": "00:01:07,000-00:01:24,000"
|
||||
}
|
||||
]
|
||||
# 合并子视频
|
||||
# combine_clip_videos(combined_video_path=combined_video_path, video_paths=video_paths, video_ost_list=video_ost_list, list_script=list_script)
|
||||
|
||||
# cfg = VideoClipParams()
|
||||
# cfg.video_aspect = VideoAspect.portrait
|
||||
# cfg.font_name = "STHeitiMedium.ttc"
|
||||
# cfg.font_size = 60
|
||||
# cfg.stroke_color = "#000000"
|
||||
# cfg.stroke_width = 1.5
|
||||
# cfg.text_fore_color = "#FFFFFF"
|
||||
# cfg.text_background_color = "transparent"
|
||||
# cfg.bgm_type = "random"
|
||||
# cfg.bgm_file = ""
|
||||
# cfg.bgm_volume = 1.0
|
||||
# cfg.subtitle_enabled = True
|
||||
# cfg.subtitle_position = "bottom"
|
||||
# cfg.n_threads = 2
|
||||
# cfg.paragraph_number = 1
|
||||
#
|
||||
# cfg.voice_volume = 1.0
|
||||
cfg = VideoClipParams()
|
||||
cfg.video_aspect = VideoAspect.portrait
|
||||
cfg.font_name = "STHeitiMedium.ttc"
|
||||
cfg.font_size = 60
|
||||
cfg.stroke_color = "#000000"
|
||||
cfg.stroke_width = 1.5
|
||||
cfg.text_fore_color = "#FFFFFF"
|
||||
cfg.text_background_color = "transparent"
|
||||
cfg.bgm_type = "random"
|
||||
cfg.bgm_file = ""
|
||||
cfg.bgm_volume = 1.0
|
||||
cfg.subtitle_enabled = True
|
||||
cfg.subtitle_position = "bottom"
|
||||
cfg.n_threads = 2
|
||||
cfg.video_volume = 1
|
||||
|
||||
# generate_video(video_path=video_file,
|
||||
# audio_path=audio_file,
|
||||
# subtitle_path=subtitle_file,
|
||||
# output_file=output_file,
|
||||
# params=cfg
|
||||
# )
|
||||
#
|
||||
# video_path = "../../storage/tasks/7f5ae494-abce-43cf-8f4f-4be43320eafa/combined-1.mp4"
|
||||
#
|
||||
# audio_path = "../../storage/tasks/7f5ae494-abce-43cf-8f4f-4be43320eafa/audio_00-00-00-07.mp3"
|
||||
#
|
||||
# subtitle_path = "../../storage/tasks/7f5ae494-abce-43cf-8f4f-4be43320eafa\subtitle.srt"
|
||||
#
|
||||
# output_file = "../../storage/tasks/7f5ae494-abce-43cf-8f4f-4be43320eafa/final-123.mp4"
|
||||
#
|
||||
# generate_video_v2(video_path=video_path,
|
||||
# audio_path=audio_path,
|
||||
# subtitle_path=subtitle_path,
|
||||
# output_file=output_file,
|
||||
# params=cfg
|
||||
# )
|
||||
cfg.voice_volume = 1.0
|
||||
|
||||
# 合并视频
|
||||
video_list = [
|
||||
'./storage/cache_videos/vid-01_03-01_50.mp4',
|
||||
'./storage/cache_videos/vid-01_55-02_29.mp4',
|
||||
'./storage/cache_videos/vid-03_24-04_04.mp4',
|
||||
'./storage/cache_videos/vid-04_50-05_28.mp4'
|
||||
]
|
||||
video_path = "../../storage/tasks/123/combined.mp4"
|
||||
audio_path = "../../storage/tasks/123/final_audio.mp3"
|
||||
subtitle_path = "../../storage/tasks/123/subtitle.srt"
|
||||
output_file = "../../storage/tasks/123/final-123.mp4"
|
||||
|
||||
generate_video_v2(video_path=video_path,
|
||||
audio_path=audio_path,
|
||||
subtitle_path=subtitle_path,
|
||||
output_file=output_file,
|
||||
params=cfg,
|
||||
list_script=list_script,
|
||||
)
|
||||
|
||||
58
app/services/video_service.py
Normal file
58
app/services/video_service.py
Normal file
@ -0,0 +1,58 @@
|
||||
import os
|
||||
from uuid import uuid4
|
||||
from loguru import logger
|
||||
from typing import Dict, List, Optional, Tuple
|
||||
|
||||
from app.services import material
|
||||
from app.models.schema import VideoClipParams
|
||||
from app.utils import utils
|
||||
|
||||
|
||||
class VideoService:
|
||||
@staticmethod
|
||||
async def crop_video(
|
||||
video_path: str,
|
||||
video_script: List[dict]
|
||||
) -> Tuple[str, Dict[str, str]]:
|
||||
"""
|
||||
裁剪视频服务
|
||||
|
||||
Args:
|
||||
video_path: 视频文件路径
|
||||
video_script: 视频脚本列表
|
||||
|
||||
Returns:
|
||||
Tuple[str, Dict[str, str]]: (task_id, 裁剪后的视频片段字典)
|
||||
视频片段字典格式: {timestamp: video_path}
|
||||
"""
|
||||
try:
|
||||
task_id = str(uuid4())
|
||||
|
||||
# 从脚本中提取时间戳列表
|
||||
time_list = [scene['timestamp'] for scene in video_script]
|
||||
|
||||
# 调用裁剪服务
|
||||
subclip_videos = material.clip_videos(
|
||||
task_id=task_id,
|
||||
timestamp_terms=time_list,
|
||||
origin_video=video_path
|
||||
)
|
||||
|
||||
if subclip_videos is None:
|
||||
raise ValueError("裁剪视频失败")
|
||||
|
||||
# 更新脚本中的视频路径
|
||||
for scene in video_script:
|
||||
try:
|
||||
scene['path'] = subclip_videos[scene['timestamp']]
|
||||
except KeyError as err:
|
||||
logger.error(f"更新视频路径失败: {err}")
|
||||
|
||||
logger.debug(f"裁剪视频成功,共生成 {len(time_list)} 个视频片段")
|
||||
logger.debug(f"视频片段路径: {subclip_videos}")
|
||||
|
||||
return task_id, subclip_videos
|
||||
|
||||
except Exception as e:
|
||||
logger.exception("裁剪视频失败")
|
||||
raise
|
||||
@ -11,6 +11,7 @@ from edge_tts.submaker import mktimestamp
|
||||
from xml.sax.saxutils import unescape
|
||||
from edge_tts import submaker, SubMaker
|
||||
from moviepy.video.tools import subtitles
|
||||
import time
|
||||
|
||||
from app.config import config
|
||||
from app.utils import utils
|
||||
@ -989,6 +990,9 @@ Gender: Female
|
||||
|
||||
Name: zh-CN-XiaoxiaoMultilingualNeural-V2
|
||||
Gender: Female
|
||||
|
||||
Name: zh-CN-YunxiNeural-V2
|
||||
Gender: Male
|
||||
""".strip()
|
||||
voices = []
|
||||
name = ""
|
||||
@ -1034,8 +1038,8 @@ def is_azure_v2_voice(voice_name: str):
|
||||
def tts(
|
||||
text: str, voice_name: str, voice_rate: float, voice_pitch: float, voice_file: str
|
||||
) -> [SubMaker, None]:
|
||||
# if is_azure_v2_voice(voice_name):
|
||||
# return azure_tts_v2(text, voice_name, voice_file)
|
||||
if is_azure_v2_voice(voice_name):
|
||||
return azure_tts_v2(text, voice_name, voice_file)
|
||||
return azure_tts_v1(text, voice_name, voice_rate, voice_pitch, voice_file)
|
||||
|
||||
|
||||
@ -1068,33 +1072,47 @@ def azure_tts_v1(
|
||||
pitch_str = convert_pitch_to_percent(voice_pitch)
|
||||
for i in range(3):
|
||||
try:
|
||||
logger.info(f"start, voice name: {voice_name}, try: {i + 1}")
|
||||
logger.info(f"第 {i+1} 次使用 edge_tts 生成音频")
|
||||
|
||||
async def _do() -> SubMaker:
|
||||
async def _do() -> tuple[SubMaker, bytes]:
|
||||
communicate = edge_tts.Communicate(text, voice_name, rate=rate_str, pitch=pitch_str, proxy=config.proxy.get("http"))
|
||||
sub_maker = edge_tts.SubMaker()
|
||||
with open(voice_file, "wb") as file:
|
||||
async for chunk in communicate.stream():
|
||||
if chunk["type"] == "audio":
|
||||
file.write(chunk["data"])
|
||||
elif chunk["type"] == "WordBoundary":
|
||||
sub_maker.create_sub(
|
||||
(chunk["offset"], chunk["duration"]), chunk["text"]
|
||||
)
|
||||
return sub_maker
|
||||
# 判断音频文件是否一件存在
|
||||
audio_data = bytes() # 用于存储音频数据
|
||||
|
||||
async for chunk in communicate.stream():
|
||||
if chunk["type"] == "audio":
|
||||
audio_data += chunk["data"]
|
||||
elif chunk["type"] == "WordBoundary":
|
||||
sub_maker.create_sub(
|
||||
(chunk["offset"], chunk["duration"]), chunk["text"]
|
||||
)
|
||||
return sub_maker, audio_data
|
||||
|
||||
# 判断音频文件是否已存在
|
||||
if os.path.exists(voice_file):
|
||||
logger.info(f"voice file exists, skip tts: {voice_file}")
|
||||
continue
|
||||
sub_maker = asyncio.run(_do())
|
||||
if not sub_maker or not sub_maker.subs:
|
||||
logger.warning(f"failed, sub_maker is None or sub_maker.subs is None")
|
||||
|
||||
# 获取音频数据和字幕信息
|
||||
sub_maker, audio_data = asyncio.run(_do())
|
||||
|
||||
# 验证数据是否有效
|
||||
if not sub_maker or not sub_maker.subs or not audio_data:
|
||||
logger.warning(f"failed, invalid data generated")
|
||||
if i < 2:
|
||||
time.sleep(1)
|
||||
continue
|
||||
|
||||
# 数据有效,写入文件
|
||||
with open(voice_file, "wb") as file:
|
||||
file.write(audio_data)
|
||||
|
||||
logger.info(f"completed, output file: {voice_file}")
|
||||
return sub_maker
|
||||
except Exception as e:
|
||||
logger.error(f"failed, error: {str(e)}")
|
||||
logger.error(f"生成音频文件时出错: {str(e)}")
|
||||
if i < 2:
|
||||
time.sleep(1)
|
||||
return None
|
||||
|
||||
|
||||
@ -1130,14 +1148,6 @@ def azure_tts_v2(text: str, voice_name: str, voice_file: str) -> [SubMaker, None
|
||||
sub_maker = SubMaker()
|
||||
|
||||
def speech_synthesizer_word_boundary_cb(evt: speechsdk.SessionEventArgs):
|
||||
# print('WordBoundary event:')
|
||||
# print('\tBoundaryType: {}'.format(evt.boundary_type))
|
||||
# print('\tAudioOffset: {}ms'.format((evt.audio_offset + 5000)))
|
||||
# print('\tDuration: {}'.format(evt.duration))
|
||||
# print('\tText: {}'.format(evt.text))
|
||||
# print('\tTextOffset: {}'.format(evt.text_offset))
|
||||
# print('\tWordLength: {}'.format(evt.word_length))
|
||||
|
||||
duration = _format_duration_to_offset(str(evt.duration))
|
||||
offset = _format_duration_to_offset(evt.audio_offset)
|
||||
sub_maker.subs.append(evt.text)
|
||||
@ -1183,9 +1193,13 @@ def azure_tts_v2(text: str, voice_name: str, voice_file: str) -> [SubMaker, None
|
||||
logger.error(
|
||||
f"azure v2 speech synthesis error: {cancellation_details.error_details}"
|
||||
)
|
||||
if i < 2: # 如果不是最后一次重试,则等待1秒
|
||||
time.sleep(1)
|
||||
logger.info(f"completed, output file: {voice_file}")
|
||||
except Exception as e:
|
||||
logger.error(f"failed, error: {str(e)}")
|
||||
if i < 2: # 如果不是最后一次重试,则等待1秒
|
||||
time.sleep(1)
|
||||
return None
|
||||
|
||||
|
||||
@ -1443,7 +1457,7 @@ def tts_multiple(task_id: str, list_script: list, voice_name: str, voice_rate: f
|
||||
|
||||
if sub_maker is None:
|
||||
logger.error(f"无法为时间戳 {timestamp} 生成音频; "
|
||||
f"如果您在中国,请使用VPN。或者手动选择 zh-CN-YunyangNeural 等角色;"
|
||||
f"如果您在中国,请使用VPN; "
|
||||
f"或者使用其他 tts 引擎")
|
||||
continue
|
||||
|
||||
@ -1460,17 +1474,12 @@ if __name__ == "__main__":
|
||||
voice_name = parse_voice_name(voice_name)
|
||||
print(voice_name)
|
||||
|
||||
with open("../../resource/scripts/test.json", 'r', encoding='utf-8') as f:
|
||||
with open("../../resource/scripts/2024-1203-205442.json", 'r', encoding='utf-8') as f:
|
||||
data = json.load(f)
|
||||
|
||||
audio_files, sub_maker_list = tts_multiple(task_id="12312312", list_script=data, voice_name=voice_name, voice_rate=1)
|
||||
audio_files, sub_maker_list = tts_multiple(task_id="12312312", list_script=data, voice_name=voice_name, voice_rate=1, voice_pitch=1)
|
||||
|
||||
full_text = " ".join([item['narration'] for item in data if not item['OST']])
|
||||
subtitle_file = os.path.join(utils.task_dir("12312312"), "subtitle_multiple.srt")
|
||||
create_subtitle_from_multiple(full_text, sub_maker_list, data, subtitle_file)
|
||||
print(f"生成的音频文件列表: {audio_files}")
|
||||
print(f"生成的字幕文件: {subtitle_file}")
|
||||
|
||||
# text = " ".join([item['narration'] for item in data])
|
||||
# sub_marks = tts(text=text, voice_name=voice_name, voice_rate=1, voice_file="../../storage/tasks/12312312/aaa.mp3")
|
||||
# create_subtitle(text=text, sub_maker=sub_marks, subtitle_file="../../storage/tasks/12312312/subtitle_123.srt")
|
||||
|
||||
146
app/services/youtube_service.py
Normal file
146
app/services/youtube_service.py
Normal file
@ -0,0 +1,146 @@
|
||||
import yt_dlp
|
||||
import os
|
||||
from typing import List, Dict, Optional, Tuple
|
||||
from loguru import logger
|
||||
from uuid import uuid4
|
||||
|
||||
from app.utils import utils
|
||||
from app.services import video as VideoService
|
||||
|
||||
|
||||
class YoutubeService:
|
||||
def __init__(self):
|
||||
self.supported_formats = ['mp4', 'mkv', 'webm', 'flv', 'avi']
|
||||
|
||||
def _get_video_formats(self, url: str) -> List[Dict]:
|
||||
"""获取视频可用的格式列表"""
|
||||
ydl_opts = {
|
||||
'quiet': True,
|
||||
'no_warnings': True
|
||||
}
|
||||
|
||||
try:
|
||||
with yt_dlp.YoutubeDL(ydl_opts) as ydl:
|
||||
info = ydl.extract_info(url, download=False)
|
||||
formats = info.get('formats', [])
|
||||
|
||||
format_list = []
|
||||
for f in formats:
|
||||
format_info = {
|
||||
'format_id': f.get('format_id', 'N/A'),
|
||||
'ext': f.get('ext', 'N/A'),
|
||||
'resolution': f.get('format_note', 'N/A'),
|
||||
'filesize': f.get('filesize', 'N/A'),
|
||||
'vcodec': f.get('vcodec', 'N/A'),
|
||||
'acodec': f.get('acodec', 'N/A')
|
||||
}
|
||||
format_list.append(format_info)
|
||||
|
||||
return format_list
|
||||
except Exception as e:
|
||||
logger.error(f"获取视频格式失败: {str(e)}")
|
||||
raise
|
||||
|
||||
def _validate_format(self, output_format: str) -> None:
|
||||
"""验证输出格式是否支持"""
|
||||
if output_format.lower() not in self.supported_formats:
|
||||
raise ValueError(
|
||||
f"不支持的视频格式: {output_format}。"
|
||||
f"支持的格式: {', '.join(self.supported_formats)}"
|
||||
)
|
||||
|
||||
async def download_video(
|
||||
self,
|
||||
url: str,
|
||||
resolution: str,
|
||||
output_format: str = 'mp4',
|
||||
rename: Optional[str] = None
|
||||
) -> Tuple[str, str, str]:
|
||||
"""
|
||||
下载指定分辨率的视频
|
||||
|
||||
Args:
|
||||
url: YouTube视频URL
|
||||
resolution: 目标分辨率 ('2160p', '1440p', '1080p', '720p' etc.)
|
||||
注意:对于类似'1080p60'的输入会被处理为'1080p'
|
||||
output_format: 输出视频格式
|
||||
rename: 可选的重命名
|
||||
|
||||
Returns:
|
||||
Tuple[str, str, str]: (task_id, output_path, filename)
|
||||
"""
|
||||
try:
|
||||
task_id = str(uuid4())
|
||||
self._validate_format(output_format)
|
||||
|
||||
# 标准化分辨率格式
|
||||
base_resolution = resolution.split('p')[0] + 'p'
|
||||
|
||||
# 获取所有可用格式
|
||||
formats = self._get_video_formats(url)
|
||||
|
||||
# 查找指定分辨率的最佳视频格式
|
||||
target_format = None
|
||||
for fmt in formats:
|
||||
fmt_resolution = fmt['resolution']
|
||||
# 将格式的分辨率也标准化后进行比较
|
||||
if fmt_resolution != 'N/A':
|
||||
fmt_base_resolution = fmt_resolution.split('p')[0] + 'p'
|
||||
if fmt_base_resolution == base_resolution and fmt['vcodec'] != 'none':
|
||||
target_format = fmt
|
||||
break
|
||||
|
||||
if target_format is None:
|
||||
# 收集可用分辨率时也进行标准化
|
||||
available_resolutions = set(
|
||||
fmt['resolution'].split('p')[0] + 'p'
|
||||
for fmt in formats
|
||||
if fmt['resolution'] != 'N/A' and fmt['vcodec'] != 'none'
|
||||
)
|
||||
raise ValueError(
|
||||
f"未找到 {base_resolution} 分辨率的视频。"
|
||||
f"可用分辨率: {', '.join(sorted(available_resolutions))}"
|
||||
)
|
||||
|
||||
# 创建输出目录
|
||||
output_dir = utils.video_dir()
|
||||
os.makedirs(output_dir, exist_ok=True)
|
||||
|
||||
# 设置下载选项
|
||||
if rename:
|
||||
# 如果指定了重命名,直接使用新名字
|
||||
filename = f"{rename}.{output_format}"
|
||||
output_template = os.path.join(output_dir, filename)
|
||||
else:
|
||||
# 否则使用任务ID和原标题
|
||||
output_template = os.path.join(output_dir, f'{task_id}_%(title)s.%(ext)s')
|
||||
|
||||
ydl_opts = {
|
||||
'format': f"{target_format['format_id']}+bestaudio[ext=m4a]/best",
|
||||
'outtmpl': output_template,
|
||||
'merge_output_format': output_format.lower(),
|
||||
'postprocessors': [{
|
||||
'key': 'FFmpegVideoConvertor',
|
||||
'preferedformat': output_format.lower(),
|
||||
}]
|
||||
}
|
||||
|
||||
# 执行下载
|
||||
with yt_dlp.YoutubeDL(ydl_opts) as ydl:
|
||||
info = ydl.extract_info(url, download=True)
|
||||
if rename:
|
||||
# 如果指定了重命名,使用新文件名
|
||||
output_path = output_template
|
||||
filename = os.path.basename(output_path)
|
||||
else:
|
||||
# 否则使用原始标题
|
||||
video_title = info.get('title', task_id)
|
||||
filename = f"{task_id}_{video_title}.{output_format}"
|
||||
output_path = os.path.join(output_dir, filename)
|
||||
|
||||
logger.info(f"视频下载成功: {output_path}")
|
||||
return task_id, output_path, filename
|
||||
|
||||
except Exception as e:
|
||||
logger.exception("下载视频失败")
|
||||
raise
|
||||
@ -1,21 +1,32 @@
|
||||
"""
|
||||
使用 moviepy 库剪辑指定时间戳视频
|
||||
使用 moviepy 库剪辑指定时间戳视频,支持时分秒毫秒精度
|
||||
"""
|
||||
|
||||
from moviepy.editor import VideoFileClip
|
||||
from datetime import datetime
|
||||
import os
|
||||
|
||||
|
||||
def time_str_to_seconds(time_str: str) -> float:
|
||||
"""
|
||||
将时间字符串转换为秒数
|
||||
参数:
|
||||
time_str: 格式为"MM:SS"的时间字符串
|
||||
time_str: 格式为"HH:MM:SS,mmm"的时间字符串,例如"00:01:23,456"
|
||||
返回:
|
||||
转换后的秒数
|
||||
转换后的秒数(float)
|
||||
"""
|
||||
time_obj = datetime.strptime(time_str, "%M:%S")
|
||||
return time_obj.minute * 60 + time_obj.second
|
||||
try:
|
||||
# 分离时间和毫秒
|
||||
time_part, ms_part = time_str.split(',')
|
||||
# 转换时分秒
|
||||
time_obj = datetime.strptime(time_part, "%H:%M:%S")
|
||||
# 计算总秒数
|
||||
total_seconds = time_obj.hour * 3600 + time_obj.minute * 60 + time_obj.second
|
||||
# 添加毫秒部分
|
||||
total_seconds += int(ms_part) / 1000
|
||||
return total_seconds
|
||||
except ValueError as e:
|
||||
raise ValueError("时间格式错误,请使用 HH:MM:SS,mmm 格式,例如 00:01:23,456") from e
|
||||
|
||||
|
||||
def format_duration(seconds: float) -> str:
|
||||
@ -24,40 +35,88 @@ def format_duration(seconds: float) -> str:
|
||||
参数:
|
||||
seconds: 秒数
|
||||
返回:
|
||||
格式化的时间字符串 (MM:SS)
|
||||
格式化的时间字符串 (HH:MM:SS,mmm)
|
||||
"""
|
||||
minutes = int(seconds // 60)
|
||||
remaining_seconds = int(seconds % 60)
|
||||
return f"{minutes:02d}:{remaining_seconds:02d}"
|
||||
hours = int(seconds // 3600)
|
||||
minutes = int((seconds % 3600) // 60)
|
||||
seconds_remain = seconds % 60
|
||||
whole_seconds = int(seconds_remain)
|
||||
milliseconds = int((seconds_remain - whole_seconds) * 1000)
|
||||
|
||||
return f"{hours:02d}:{minutes:02d}:{whole_seconds:02d},{milliseconds:03d}"
|
||||
|
||||
|
||||
def cut_video(video_path: str, start_time: str, end_time: str) -> None:
|
||||
def cut_video(video_path: str, start_time: str, end_time: str, output_path: str) -> None:
|
||||
"""
|
||||
剪辑视频
|
||||
参数:
|
||||
video_path: 视频文件路径
|
||||
start_time: 开始时间 (格式: "MM:SS")
|
||||
end_time: 结束时间 (格式: "MM:SS")
|
||||
start_time: 开始时间 (格式: "HH:MM:SS,mmm")
|
||||
end_time: 结束时间 (格式: "HH:MM:SS,mmm")
|
||||
output_path: 输出文件路径
|
||||
"""
|
||||
# 转换时间字符串为秒数
|
||||
start_seconds = time_str_to_seconds(start_time)
|
||||
end_seconds = time_str_to_seconds(end_time)
|
||||
|
||||
# 加载视频文件
|
||||
video = VideoFileClip(video_path)
|
||||
|
||||
# 计算剪辑时长
|
||||
clip_duration = end_seconds - start_seconds
|
||||
print(f"原视频总长度: {format_duration(video.duration)}")
|
||||
print(f"剪辑时长: {format_duration(clip_duration)}")
|
||||
|
||||
# 剪辑视频
|
||||
video = video.subclip(start_seconds, end_seconds)
|
||||
video.write_videofile("../../resource/videos/cut_video2.mp4")
|
||||
|
||||
# 释放资源
|
||||
video.close()
|
||||
try:
|
||||
# 确保输出目录存在
|
||||
output_dir = os.path.dirname(output_path)
|
||||
if not os.path.exists(output_dir):
|
||||
os.makedirs(output_dir)
|
||||
|
||||
# 如果输出文件已存在,先尝试删除
|
||||
if os.path.exists(output_path):
|
||||
try:
|
||||
os.remove(output_path)
|
||||
except PermissionError:
|
||||
print(f"无法删除已存在的文件:{output_path},请确保文件未被其他程序占用")
|
||||
return
|
||||
|
||||
# 转换时间字符串为秒数
|
||||
start_seconds = time_str_to_seconds(start_time)
|
||||
end_seconds = time_str_to_seconds(end_time)
|
||||
|
||||
# 加载视频文件
|
||||
video = VideoFileClip(video_path)
|
||||
|
||||
# 验证时间范围
|
||||
if start_seconds >= video.duration or end_seconds > video.duration:
|
||||
raise ValueError(f"剪辑时间超出视频长度!视频总长度为: {format_duration(video.duration)}")
|
||||
|
||||
if start_seconds >= end_seconds:
|
||||
raise ValueError("结束时间必须大于开始时间!")
|
||||
|
||||
# 计算剪辑时长
|
||||
clip_duration = end_seconds - start_seconds
|
||||
print(f"原视频总长度: {format_duration(video.duration)}")
|
||||
print(f"剪辑时长: {format_duration(clip_duration)}")
|
||||
print(f"剪辑区间: {start_time} -> {end_time}")
|
||||
|
||||
# 剪辑视频
|
||||
video = video.subclip(start_seconds, end_seconds)
|
||||
|
||||
# 添加错误处理的写入过程
|
||||
try:
|
||||
video.write_videofile(
|
||||
output_path,
|
||||
codec='libx264',
|
||||
audio_codec='aac',
|
||||
temp_audiofile='temp-audio.m4a',
|
||||
remove_temp=True
|
||||
)
|
||||
except IOError as e:
|
||||
print(f"写入视频文件时发生错误:{str(e)}")
|
||||
raise
|
||||
finally:
|
||||
# 确保资源被释放
|
||||
video.close()
|
||||
|
||||
except Exception as e:
|
||||
print(f"视频剪辑过程中发生错误:{str(e)}")
|
||||
raise
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
cut_video("../../resource/videos/best.mp4", "00:40", "02:40")
|
||||
cut_video(
|
||||
video_path="/Users/apple/Desktop/NarratoAI/resource/videos/duanju_yuansp.mp4",
|
||||
start_time="00:00:00,789",
|
||||
end_time="00:02:00,123",
|
||||
output_path="/Users/apple/Desktop/NarratoAI/resource/videos/duanju_yuansp_cut3.mp4"
|
||||
)
|
||||
|
||||
105
app/test/test_qwen.py
Normal file
105
app/test/test_qwen.py
Normal file
@ -0,0 +1,105 @@
|
||||
import os
|
||||
import traceback
|
||||
import json
|
||||
from openai import OpenAI
|
||||
from pydantic import BaseModel
|
||||
from typing import List
|
||||
from app.utils import utils
|
||||
from app.services.subtitle import extract_audio_and_create_subtitle
|
||||
|
||||
|
||||
class Step(BaseModel):
|
||||
timestamp: str
|
||||
picture: str
|
||||
narration: str
|
||||
OST: int
|
||||
new_timestamp: str
|
||||
|
||||
class MathReasoning(BaseModel):
|
||||
result: List[Step]
|
||||
|
||||
|
||||
def chat_with_qwen(prompt: str, system_message: str, subtitle_path: str) -> str:
|
||||
"""
|
||||
与通义千问AI模型进行对话
|
||||
|
||||
Args:
|
||||
prompt (str): 用户输入的问题或提示
|
||||
system_message (str): 系统提示信息,用于设定AI助手的行为。默认为"You are a helpful assistant."
|
||||
subtitle_path (str): 字幕文件路径
|
||||
Returns:
|
||||
str: AI助手的回复内容
|
||||
|
||||
Raises:
|
||||
Exception: 当API调用失败时抛出异常
|
||||
"""
|
||||
try:
|
||||
client = OpenAI(
|
||||
api_key="sk-a1acd853d88d41d3ae92777d7bfa2612",
|
||||
base_url="https://dashscope.aliyuncs.com/compatible-mode/v1",
|
||||
)
|
||||
|
||||
# 读取字幕文件
|
||||
with open(subtitle_path, "r", encoding="utf-8") as file:
|
||||
subtitle_content = file.read()
|
||||
|
||||
completion = client.chat.completions.create(
|
||||
model="qwen-turbo-2024-11-01",
|
||||
messages=[
|
||||
{'role': 'system', 'content': system_message},
|
||||
{'role': 'user', 'content': prompt + subtitle_content}
|
||||
]
|
||||
)
|
||||
return completion.choices[0].message.content
|
||||
|
||||
except Exception as e:
|
||||
error_message = f"调用千问API时发生错误:{str(e)}"
|
||||
print(error_message)
|
||||
print("请参考文档:https://help.aliyun.com/zh/model-studio/developer-reference/error-code")
|
||||
raise Exception(error_message)
|
||||
|
||||
|
||||
# 使用示例
|
||||
if __name__ == "__main__":
|
||||
try:
|
||||
video_path = utils.video_dir("duanju_yuansp.mp4")
|
||||
# # 判断视频是否存在
|
||||
# if not os.path.exists(video_path):
|
||||
# print(f"视频文件不存在:{video_path}")
|
||||
# exit(1)
|
||||
# 提取字幕
|
||||
subtitle_path = os.path.join(utils.video_dir(""), f"duanju_yuan.srt")
|
||||
extract_audio_and_create_subtitle(video_file=video_path, subtitle_file=subtitle_path)
|
||||
# 分析字幕
|
||||
system_message = """
|
||||
你是一个视频srt字幕分析剪辑器, 输入视频的srt字幕, 分析其中的精彩且尽可能连续的片段并裁剪出来, 注意确保文字与时间戳的正确匹配。
|
||||
输出需严格按照如下 json 格式:
|
||||
[
|
||||
{
|
||||
"timestamp": "00:00:50,020-00,01:44,000",
|
||||
"picture": "画面1",
|
||||
"narration": "播放原声",
|
||||
"OST": 0,
|
||||
"new_timestamp": "00:00:00,000-00:00:54,020"
|
||||
},
|
||||
{
|
||||
"timestamp": "01:49-02:30",
|
||||
"picture": "画面2",
|
||||
"narration": "播放原声",
|
||||
"OST": 2,
|
||||
"new_timestamp": "00:54-01:35"
|
||||
},
|
||||
]
|
||||
"""
|
||||
prompt = "字幕如下:\n"
|
||||
response = chat_with_qwen(prompt, system_message, subtitle_path)
|
||||
print(response)
|
||||
# 保存json,注意json中是时间戳需要转换为 分:秒(现在的时间是 "timestamp": "00:00:00,020-00:00:01,660", 需要转换为 "timestamp": "00:00-01:66")
|
||||
# response = json.loads(response)
|
||||
# for item in response:
|
||||
# item["timestamp"] = item["timestamp"].replace(":", "-")
|
||||
# with open(os.path.join(utils.video_dir(""), "duanju_yuan.json"), "w", encoding="utf-8") as file:
|
||||
# json.dump(response, file, ensure_ascii=False)
|
||||
|
||||
except Exception as e:
|
||||
print(traceback.format_exc())
|
||||
@ -10,6 +10,7 @@ from google.api_core import exceptions
|
||||
import google.generativeai as genai
|
||||
import PIL.Image
|
||||
import traceback
|
||||
from app.utils import utils
|
||||
|
||||
|
||||
class VisionAnalyzer:
|
||||
@ -146,14 +147,34 @@ class VisionAnalyzer:
|
||||
response_text = result['response']
|
||||
image_paths = result['image_paths']
|
||||
|
||||
img_name_start = Path(image_paths[0]).stem.split('_')[-1]
|
||||
img_name_end = Path(image_paths[-1]).stem.split('_')[-1]
|
||||
txt_path = os.path.join(output_dir, f"frame_{img_name_start}_{img_name_end}.txt")
|
||||
# 从文件名中提取时间戳并转换为标准格式
|
||||
def format_timestamp(img_path):
|
||||
# 从文件名中提取时间部分
|
||||
timestamp = Path(img_path).stem.split('_')[-1]
|
||||
try:
|
||||
# 将时间转换为秒
|
||||
seconds = utils.time_to_seconds(timestamp.replace('_', ':'))
|
||||
# 转换为 HH:MM:SS,mmm 格式
|
||||
hours = int(seconds // 3600)
|
||||
minutes = int((seconds % 3600) // 60)
|
||||
seconds_remainder = seconds % 60
|
||||
whole_seconds = int(seconds_remainder)
|
||||
milliseconds = int((seconds_remainder - whole_seconds) * 1000)
|
||||
|
||||
return f"{hours:02d}:{minutes:02d}:{whole_seconds:02d},{milliseconds:03d}"
|
||||
except Exception as e:
|
||||
logger.error(f"时间戳格式转换错误: {timestamp}, {str(e)}")
|
||||
return timestamp
|
||||
|
||||
start_timestamp = format_timestamp(image_paths[0])
|
||||
end_timestamp = format_timestamp(image_paths[-1])
|
||||
|
||||
txt_path = os.path.join(output_dir, f"frame_{start_timestamp}_{end_timestamp}.txt")
|
||||
|
||||
# 保存结果到txt文件
|
||||
with open(txt_path, 'w', encoding='utf-8') as f:
|
||||
f.write(response_text.strip())
|
||||
print(f"已保存分析结果到: {txt_path}")
|
||||
logger.info(f"已保存分析结果到: {txt_path}")
|
||||
|
||||
def load_images(self, image_paths: List[str]) -> List[PIL.Image.Image]:
|
||||
"""
|
||||
265
app/utils/qwenvl_analyzer.py
Normal file
265
app/utils/qwenvl_analyzer.py
Normal file
@ -0,0 +1,265 @@
|
||||
import json
|
||||
from typing import List, Union, Dict
|
||||
import os
|
||||
from pathlib import Path
|
||||
from loguru import logger
|
||||
from tqdm import tqdm
|
||||
import asyncio
|
||||
from tenacity import retry, stop_after_attempt, RetryError, wait_exponential
|
||||
from openai import OpenAI
|
||||
import PIL.Image
|
||||
import base64
|
||||
import io
|
||||
import traceback
|
||||
|
||||
|
||||
class QwenAnalyzer:
|
||||
"""千问视觉分析器类"""
|
||||
|
||||
def __init__(self, model_name: str = "qwen-vl-max-latest", api_key: str = None, base_url: str = None):
|
||||
"""
|
||||
初始化千问视觉分析器
|
||||
|
||||
Args:
|
||||
model_name: 模型名称,默认使用 qwen-vl-max-latest
|
||||
api_key: 阿里云API密钥
|
||||
base_url: API基础URL,如果为None则使用默认值
|
||||
"""
|
||||
if not api_key:
|
||||
raise ValueError("必须提供API密钥")
|
||||
|
||||
self.model_name = model_name
|
||||
self.api_key = api_key
|
||||
self.base_url = base_url or "https://dashscope.aliyuncs.com/compatible-mode/v1"
|
||||
|
||||
# 配置API客户端
|
||||
self._configure_client()
|
||||
|
||||
def _configure_client(self):
|
||||
"""
|
||||
配置API客户端
|
||||
使用最简化的参数配置,避免不必要的参数
|
||||
"""
|
||||
try:
|
||||
self.client = OpenAI(
|
||||
api_key=self.api_key,
|
||||
base_url=self.base_url
|
||||
)
|
||||
except Exception as e:
|
||||
logger.error(f"初始化OpenAI客户端失败: {str(e)}")
|
||||
raise
|
||||
|
||||
def _image_to_base64(self, image: PIL.Image.Image) -> str:
|
||||
"""
|
||||
将PIL图片对象转换为base64字符串
|
||||
"""
|
||||
buffered = io.BytesIO()
|
||||
image.save(buffered, format="JPEG")
|
||||
return base64.b64encode(buffered.getvalue()).decode("utf-8")
|
||||
|
||||
@retry(
|
||||
stop=stop_after_attempt(3),
|
||||
wait=wait_exponential(multiplier=1, min=4, max=10)
|
||||
)
|
||||
async def _generate_content_with_retry(self, prompt: str, batch: List[PIL.Image.Image]):
|
||||
"""使用重试机制的内部方法来调用千问API"""
|
||||
try:
|
||||
# 构建消息内容
|
||||
content = []
|
||||
|
||||
# 添加图片
|
||||
for img in batch:
|
||||
base64_image = self._image_to_base64(img)
|
||||
content.append({
|
||||
"type": "image_url",
|
||||
"image_url": {
|
||||
"url": f"data:image/jpeg;base64,{base64_image}"
|
||||
}
|
||||
})
|
||||
|
||||
# 添加文本提示
|
||||
content.append({
|
||||
"type": "text",
|
||||
"text": prompt
|
||||
})
|
||||
|
||||
# 调用API
|
||||
response = await asyncio.to_thread(
|
||||
self.client.chat.completions.create,
|
||||
model=self.model_name,
|
||||
messages=[{
|
||||
"role": "user",
|
||||
"content": content
|
||||
}]
|
||||
)
|
||||
|
||||
return response.choices[0].message.content
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"API调用错误: {str(e)}")
|
||||
raise RetryError("API调用失败")
|
||||
|
||||
async def analyze_images(self,
|
||||
images: Union[List[str], List[PIL.Image.Image]],
|
||||
prompt: str,
|
||||
batch_size: int = 5) -> List[Dict]:
|
||||
"""
|
||||
批量分析多张图片
|
||||
Args:
|
||||
images: 图片路径列表或PIL图片对象列表
|
||||
prompt: 分析提示词
|
||||
batch_size: 批处理大小
|
||||
Returns:
|
||||
分析结果列表
|
||||
"""
|
||||
try:
|
||||
# 保存原始图片路径(如果是路径列表的话)
|
||||
original_paths = images if isinstance(images[0], str) else None
|
||||
|
||||
# 加载图片
|
||||
if isinstance(images[0], str):
|
||||
logger.info("正在加载图片...")
|
||||
images = self.load_images(images)
|
||||
|
||||
# 验证图片列表
|
||||
if not images:
|
||||
raise ValueError("图片列表为空")
|
||||
|
||||
# 验证每个图片对象
|
||||
valid_images = []
|
||||
valid_paths = []
|
||||
for i, img in enumerate(images):
|
||||
if not isinstance(img, PIL.Image.Image):
|
||||
logger.error(f"无效的图片对象,索引 {i}: {type(img)}")
|
||||
continue
|
||||
valid_images.append(img)
|
||||
if original_paths:
|
||||
valid_paths.append(original_paths[i])
|
||||
|
||||
if not valid_images:
|
||||
raise ValueError("没有有效的图片对象")
|
||||
|
||||
images = valid_images
|
||||
results = []
|
||||
total_batches = (len(images) + batch_size - 1) // batch_size
|
||||
|
||||
with tqdm(total=total_batches, desc="分析进度") as pbar:
|
||||
for i in range(0, len(images), batch_size):
|
||||
batch = images[i:i + batch_size]
|
||||
batch_paths = valid_paths[i:i + batch_size] if valid_paths else None
|
||||
retry_count = 0
|
||||
|
||||
while retry_count < 3:
|
||||
try:
|
||||
# 在每个批次处理前<E79086><E5898D>加小延迟
|
||||
if i > 0:
|
||||
await asyncio.sleep(2)
|
||||
|
||||
# 确保每个批次的图片都是有效的
|
||||
valid_batch = [img for img in batch if isinstance(img, PIL.Image.Image)]
|
||||
if not valid_batch:
|
||||
raise ValueError(f"批次 {i // batch_size} 中没有有效的图片")
|
||||
|
||||
response = await self._generate_content_with_retry(prompt, valid_batch)
|
||||
result_dict = {
|
||||
'batch_index': i // batch_size,
|
||||
'images_processed': len(valid_batch),
|
||||
'response': response,
|
||||
'model_used': self.model_name
|
||||
}
|
||||
|
||||
# 添加图片路径信息(如果有的话)
|
||||
if batch_paths:
|
||||
result_dict['image_paths'] = batch_paths
|
||||
|
||||
results.append(result_dict)
|
||||
break
|
||||
|
||||
except Exception as e:
|
||||
retry_count += 1
|
||||
error_msg = f"批次 {i // batch_size} 处理出错: {str(e)}"
|
||||
logger.error(error_msg)
|
||||
|
||||
if retry_count >= 3:
|
||||
results.append({
|
||||
'batch_index': i // batch_size,
|
||||
'images_processed': len(batch),
|
||||
'error': error_msg,
|
||||
'model_used': self.model_name,
|
||||
'image_paths': batch_paths if batch_paths else []
|
||||
})
|
||||
else:
|
||||
logger.info(f"批次 {i // batch_size} 处理失败,等待60秒后重试当前批次...")
|
||||
await asyncio.sleep(60)
|
||||
|
||||
pbar.update(1)
|
||||
|
||||
return results
|
||||
|
||||
except Exception as e:
|
||||
error_msg = f"图片分析过程中发生错误: {str(e)}\n{traceback.format_exc()}"
|
||||
logger.error(error_msg)
|
||||
raise Exception(error_msg)
|
||||
|
||||
def save_results_to_txt(self, results: List[Dict], output_dir: str):
|
||||
"""将分析结果保存到txt文件"""
|
||||
# 确保输出目录存在
|
||||
os.makedirs(output_dir, exist_ok=True)
|
||||
|
||||
for i, result in enumerate(results):
|
||||
response_text = result['response']
|
||||
|
||||
# 如果有图片路径信息,<E681AF><EFBC8C><EFBFBD>用它来生成文件名
|
||||
if result.get('image_paths'):
|
||||
image_paths = result['image_paths']
|
||||
img_name_start = Path(image_paths[0]).stem.split('_')[-1]
|
||||
img_name_end = Path(image_paths[-1]).stem.split('_')[-1]
|
||||
file_name = f"frame_{img_name_start}_{img_name_end}.txt"
|
||||
else:
|
||||
# 如果没有路径信息,使用批次索引
|
||||
file_name = f"batch_{result['batch_index']}.txt"
|
||||
|
||||
txt_path = os.path.join(output_dir, file_name)
|
||||
|
||||
# 保存结果到txt文件
|
||||
with open(txt_path, 'w', encoding='utf-8') as f:
|
||||
f.write(response_text.strip())
|
||||
logger.info(f"已保存分析结果到: {txt_path}")
|
||||
|
||||
def load_images(self, image_paths: List[str]) -> List[PIL.Image.Image]:
|
||||
"""
|
||||
加载多张图片
|
||||
Args:
|
||||
image_paths: 图片路径列表
|
||||
Returns:
|
||||
加载后的PIL Image对象列表
|
||||
"""
|
||||
images = []
|
||||
failed_images = []
|
||||
|
||||
for img_path in image_paths:
|
||||
try:
|
||||
if not os.path.exists(img_path):
|
||||
logger.error(f"图片文件不存在: {img_path}")
|
||||
failed_images.append(img_path)
|
||||
continue
|
||||
|
||||
img = PIL.Image.open(img_path)
|
||||
# 确保图片被完全加载
|
||||
img.load()
|
||||
# 转换为RGB模式
|
||||
if img.mode != 'RGB':
|
||||
img = img.convert('RGB')
|
||||
images.append(img)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"无法加载图片 {img_path}: {str(e)}")
|
||||
failed_images.append(img_path)
|
||||
|
||||
if failed_images:
|
||||
logger.warning(f"以下图片加载失败:\n{json.dumps(failed_images, indent=2, ensure_ascii=False)}")
|
||||
|
||||
if not images:
|
||||
raise ValueError("没有成功加载任何图片")
|
||||
|
||||
return images
|
||||
@ -374,22 +374,65 @@ class ScriptProcessor:
|
||||
记住:要敢于用"温和的违反"制造笑点,但要把握好尺度,让观众在轻松愉快中感受到乐趣。"""
|
||||
|
||||
def calculate_duration_and_word_count(self, time_range: str) -> int:
|
||||
"""
|
||||
计算时间范围的持续时长并估算合适的字数
|
||||
|
||||
Args:
|
||||
time_range: 时间范围字符串,格式为 "HH:MM:SS,mmm-HH:MM:SS,mmm"
|
||||
例如: "00:00:50,100-00:01:21,500"
|
||||
|
||||
Returns:
|
||||
int: 估算的合适字数
|
||||
基于经验公式: 每0.35秒可以说一个字
|
||||
例如: 10秒可以说约28个字 (10/0.35≈28.57)
|
||||
"""
|
||||
try:
|
||||
start_str, end_str = time_range.split('-')
|
||||
|
||||
def time_to_seconds(time_str):
|
||||
minutes, seconds = map(int, time_str.split(':'))
|
||||
return minutes * 60 + seconds
|
||||
|
||||
|
||||
def time_to_seconds(time_str: str) -> float:
|
||||
"""
|
||||
将时间字符串转换为秒数(带毫秒精度)
|
||||
|
||||
Args:
|
||||
time_str: 时间字符串,格式为 "HH:MM:SS,mmm"
|
||||
例如: "00:00:50,100" 表示50.1秒
|
||||
|
||||
Returns:
|
||||
float: 转换后的秒数(带毫秒)
|
||||
"""
|
||||
try:
|
||||
# 处理毫秒部分
|
||||
time_part, ms_part = time_str.split(',')
|
||||
hours, minutes, seconds = map(int, time_part.split(':'))
|
||||
milliseconds = int(ms_part)
|
||||
|
||||
# 转换为秒
|
||||
total_seconds = (hours * 3600) + (minutes * 60) + seconds + (milliseconds / 1000)
|
||||
return total_seconds
|
||||
|
||||
except ValueError as e:
|
||||
logger.warning(f"时间格式解析错误: {time_str}, error: {e}")
|
||||
return 0.0
|
||||
|
||||
# 计算开始和结束时间的秒数
|
||||
start_seconds = time_to_seconds(start_str)
|
||||
end_seconds = time_to_seconds(end_str)
|
||||
|
||||
# 计算持续时间(秒)
|
||||
duration = end_seconds - start_seconds
|
||||
word_count = int(duration / 0.35)
|
||||
|
||||
|
||||
# 根据经验公式计算字数: 每0.5秒一个字
|
||||
word_count = int(duration / 0.4)
|
||||
|
||||
# 确保字数在合理范围内
|
||||
word_count = max(10, min(word_count, 500)) # 限制在10-500字之间
|
||||
|
||||
logger.debug(f"时间范围 {time_range} 的持续时间为 {duration:.3f}秒, 估算字数: {word_count}")
|
||||
return word_count
|
||||
|
||||
except Exception as e:
|
||||
logger.info(f"时间格式转换错误: {traceback.format_exc()}")
|
||||
return 100
|
||||
logger.warning(f"字数计算错误: {traceback.format_exc()}")
|
||||
return 100 # 发生错误时返回默认字数
|
||||
|
||||
def process_frames(self, frame_content_list: List[Dict]) -> List[Dict]:
|
||||
for frame_content in frame_content_list:
|
||||
@ -406,22 +449,47 @@ class ScriptProcessor:
|
||||
def _save_results(self, frame_content_list: List[Dict]):
|
||||
"""保存处理结果,并添加新的时间戳"""
|
||||
try:
|
||||
# 转换秒数为 MM:SS 格式
|
||||
def seconds_to_time(seconds):
|
||||
minutes = seconds // 60
|
||||
remaining_seconds = seconds % 60
|
||||
return f"{minutes:02d}:{remaining_seconds:02d}"
|
||||
def format_timestamp(seconds: float) -> str:
|
||||
"""将秒数转换为 HH:MM:SS,mmm 格式"""
|
||||
hours = int(seconds // 3600)
|
||||
minutes = int((seconds % 3600) // 60)
|
||||
seconds_remainder = seconds % 60
|
||||
whole_seconds = int(seconds_remainder)
|
||||
milliseconds = int((seconds_remainder - whole_seconds) * 1000)
|
||||
|
||||
return f"{hours:02d}:{minutes:02d}:{whole_seconds:02d},{milliseconds:03d}"
|
||||
|
||||
# 计算新的时间戳
|
||||
current_time = 0 # 当前时间点(秒)
|
||||
current_time = 0.0 # 当前时间点(秒,包含毫秒)
|
||||
|
||||
for frame in frame_content_list:
|
||||
# 获取原始时间戳的持续时间
|
||||
start_str, end_str = frame['timestamp'].split('-')
|
||||
|
||||
def time_to_seconds(time_str):
|
||||
minutes, seconds = map(int, time_str.split(':'))
|
||||
return minutes * 60 + seconds
|
||||
def time_to_seconds(time_str: str) -> float:
|
||||
"""将时间字符串转换为秒数(包含毫秒)"""
|
||||
try:
|
||||
if ',' in time_str:
|
||||
time_part, ms_part = time_str.split(',')
|
||||
ms = float(ms_part) / 1000
|
||||
else:
|
||||
time_part = time_str
|
||||
ms = 0
|
||||
|
||||
parts = time_part.split(':')
|
||||
if len(parts) == 3: # HH:MM:SS
|
||||
h, m, s = map(float, parts)
|
||||
seconds = h * 3600 + m * 60 + s
|
||||
elif len(parts) == 2: # MM:SS
|
||||
m, s = map(float, parts)
|
||||
seconds = m * 60 + s
|
||||
else: # SS
|
||||
seconds = float(parts[0])
|
||||
|
||||
return seconds + ms
|
||||
except Exception as e:
|
||||
logger.error(f"时间格式转换错误 {time_str}: {str(e)}")
|
||||
return 0.0
|
||||
|
||||
# 计算当前片段的持续时间
|
||||
start_seconds = time_to_seconds(start_str)
|
||||
@ -429,8 +497,8 @@ class ScriptProcessor:
|
||||
duration = end_seconds - start_seconds
|
||||
|
||||
# 设置新的时间戳
|
||||
new_start = seconds_to_time(current_time)
|
||||
new_end = seconds_to_time(current_time + duration)
|
||||
new_start = format_timestamp(current_time)
|
||||
new_end = format_timestamp(current_time + duration)
|
||||
frame['new_timestamp'] = f"{new_start}-{new_end}"
|
||||
|
||||
# 更新当前时间点
|
||||
@ -443,7 +511,7 @@ class ScriptProcessor:
|
||||
with open(file_name, 'w', encoding='utf-8') as file:
|
||||
json.dump(frame_content_list, file, ensure_ascii=False, indent=4)
|
||||
|
||||
logger.info(f"保存脚本成功,总时长: {seconds_to_time(current_time)}")
|
||||
logger.info(f"保存脚本成功,总时长: {format_timestamp(current_time)}")
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"保存结果时发生错误: {str(e)}\n{traceback.format_exc()}")
|
||||
|
||||
@ -40,7 +40,7 @@ def to_json(obj):
|
||||
# 如果对象是二进制数据,转换为base64编码的字符串
|
||||
elif isinstance(o, bytes):
|
||||
return "*** binary data ***"
|
||||
# 如果对象是字典,递归处理每个键值对
|
||||
# 如果象是字典,递归处理每个键值对
|
||||
elif isinstance(o, dict):
|
||||
return {k: serialize(v) for k, v in o.items()}
|
||||
# 如果对象是列表或元组,递归处理每个元素
|
||||
@ -56,7 +56,7 @@ def to_json(obj):
|
||||
# 使用serialize函数处理输入对象
|
||||
serialized_obj = serialize(obj)
|
||||
|
||||
# 序列化处理后的对象为JSON<EFBFBD><EFBFBD><EFBFBD>符串
|
||||
# 序列化处理后的对象为JSON符串
|
||||
return json.dumps(serialized_obj, ensure_ascii=False, indent=4)
|
||||
except Exception as e:
|
||||
return None
|
||||
@ -126,6 +126,15 @@ def public_dir(sub_dir: str = ""):
|
||||
return d
|
||||
|
||||
|
||||
def srt_dir(sub_dir: str = ""):
|
||||
d = resource_dir(f"srt")
|
||||
if sub_dir:
|
||||
d = os.path.join(d, sub_dir)
|
||||
if not os.path.exists(d):
|
||||
os.makedirs(d)
|
||||
return d
|
||||
|
||||
|
||||
def run_in_background(func, *args, **kwargs):
|
||||
def run():
|
||||
try:
|
||||
@ -302,15 +311,49 @@ def get_current_country():
|
||||
|
||||
|
||||
def time_to_seconds(time_str: str) -> float:
|
||||
parts = time_str.split(':')
|
||||
if len(parts) == 2:
|
||||
m, s = map(float, parts)
|
||||
return m * 60 + s
|
||||
elif len(parts) == 3:
|
||||
h, m, s = map(float, parts)
|
||||
return h * 3600 + m * 60 + s
|
||||
else:
|
||||
raise ValueError(f"Invalid time format: {time_str}")
|
||||
"""
|
||||
将时间字符串转换为秒数,支持多种格式:
|
||||
- "HH:MM:SS,mmm" -> 小时:分钟:秒,毫秒
|
||||
- "MM:SS,mmm" -> 分钟:秒,毫秒
|
||||
- "SS,mmm" -> 秒,毫秒
|
||||
- "SS-mmm" -> 秒-毫秒
|
||||
|
||||
Args:
|
||||
time_str: 时间字符串
|
||||
|
||||
Returns:
|
||||
float: 转换后的秒数(包含毫秒)
|
||||
"""
|
||||
try:
|
||||
# 处理带有'-'的毫秒格式
|
||||
if '-' in time_str:
|
||||
time_part, ms_part = time_str.split('-')
|
||||
ms = float(ms_part) / 1000
|
||||
# 处理带有','的毫秒格式
|
||||
elif ',' in time_str:
|
||||
time_part, ms_part = time_str.split(',')
|
||||
ms = float(ms_part) / 1000
|
||||
else:
|
||||
time_part = time_str
|
||||
ms = 0
|
||||
|
||||
# 分割时间部分
|
||||
parts = time_part.split(':')
|
||||
|
||||
if len(parts) == 3: # HH:MM:SS
|
||||
h, m, s = map(float, parts)
|
||||
seconds = h * 3600 + m * 60 + s
|
||||
elif len(parts) == 2: # MM:SS
|
||||
m, s = map(float, parts)
|
||||
seconds = m * 60 + s
|
||||
else: # SS
|
||||
seconds = float(parts[0])
|
||||
|
||||
return seconds + ms
|
||||
|
||||
except (ValueError, IndexError) as e:
|
||||
logger.error(f"时间格式转换错误 {time_str}: {str(e)}")
|
||||
return 0.0
|
||||
|
||||
|
||||
def seconds_to_time(seconds: float) -> str:
|
||||
@ -320,15 +363,25 @@ def seconds_to_time(seconds: float) -> str:
|
||||
|
||||
|
||||
def calculate_total_duration(scenes):
|
||||
"""
|
||||
计算场景列表的总时长
|
||||
|
||||
Args:
|
||||
scenes: 场景列表,每个场景包含 timestamp 字段,格式如 "00:00:28,350-00:00:41,000"
|
||||
|
||||
Returns:
|
||||
float: 总时长(秒)
|
||||
"""
|
||||
total_seconds = 0
|
||||
|
||||
for scene in scenes:
|
||||
start, end = scene['timestamp'].split('-')
|
||||
start_time = datetime.strptime(start, '%M:%S')
|
||||
end_time = datetime.strptime(end, '%M:%S')
|
||||
# 使用 time_to_seconds 函数处理更精确的时间格式
|
||||
start_seconds = time_to_seconds(start)
|
||||
end_seconds = time_to_seconds(end)
|
||||
|
||||
duration = end_time - start_time
|
||||
total_seconds += duration.total_seconds()
|
||||
duration = end_seconds - start_seconds
|
||||
total_seconds += duration
|
||||
|
||||
return total_seconds
|
||||
|
||||
@ -451,7 +504,7 @@ def clear_keyframes_cache(video_path: str = None):
|
||||
return
|
||||
|
||||
if video_path:
|
||||
# <EFBFBD><EFBFBD><EFBFBD>理指定视频的缓存
|
||||
# 理指定视频的缓存
|
||||
video_hash = md5(video_path + str(os.path.getmtime(video_path)))
|
||||
video_keyframes_dir = os.path.join(keyframes_dir, video_hash)
|
||||
if os.path.exists(video_keyframes_dir):
|
||||
@ -520,3 +573,21 @@ def download_font(url: str, font_path: str):
|
||||
except Exception as e:
|
||||
logger.error(f"下载字体文件失败: {e}")
|
||||
raise
|
||||
|
||||
def init_imagemagick():
|
||||
"""初始化 ImageMagick 配置"""
|
||||
try:
|
||||
# 检查 ImageMagick 是否已安装
|
||||
import subprocess
|
||||
result = subprocess.run(['magick', '-version'], capture_output=True, text=True)
|
||||
if result.returncode != 0:
|
||||
logger.error("ImageMagick 未安装或配置不正确")
|
||||
return False
|
||||
|
||||
# 设置 IMAGEMAGICK_BINARY 环境变量
|
||||
os.environ['IMAGEMAGICK_BINARY'] = 'magick'
|
||||
|
||||
return True
|
||||
except Exception as e:
|
||||
logger.error(f"初始化 ImageMagick 失败: {str(e)}")
|
||||
return False
|
||||
|
||||
@ -51,21 +51,34 @@ class VideoProcessor:
|
||||
def detect_shot_boundaries(self, frames: List[np.ndarray], threshold: int = 30) -> List[int]:
|
||||
"""
|
||||
使用帧差法检测镜头边界
|
||||
|
||||
|
||||
Args:
|
||||
frames: 视频帧列表
|
||||
threshold: 差异阈值
|
||||
|
||||
threshold: 差异阈值,默认值调低为30
|
||||
|
||||
Returns:
|
||||
List[int]: 镜头边界帧的索引列表
|
||||
"""
|
||||
shot_boundaries = []
|
||||
if len(frames) < 2: # 添加帧数检查
|
||||
logger.warning("视频帧数过少,无法检测场景边界")
|
||||
return [len(frames) - 1] # 返回最后一帧作为边界
|
||||
|
||||
for i in range(1, len(frames)):
|
||||
prev_frame = cv2.cvtColor(frames[i - 1], cv2.COLOR_BGR2GRAY)
|
||||
curr_frame = cv2.cvtColor(frames[i], cv2.COLOR_BGR2GRAY)
|
||||
diff = np.mean(np.abs(curr_frame.astype(int) - prev_frame.astype(int)))
|
||||
|
||||
# 计算帧差
|
||||
diff = np.mean(np.abs(curr_frame.astype(float) - prev_frame.astype(float)))
|
||||
|
||||
if diff > threshold:
|
||||
shot_boundaries.append(i)
|
||||
|
||||
# 如果没有检测到任何边界,至少返回最后一帧
|
||||
if not shot_boundaries:
|
||||
logger.warning("未检测到场景边界,将视频作为单个场景处理")
|
||||
shot_boundaries.append(len(frames) - 1)
|
||||
|
||||
return shot_boundaries
|
||||
|
||||
def extract_keyframes(self, frames: List[np.ndarray], shot_boundaries: List[int]) -> Tuple[
|
||||
@ -113,12 +126,7 @@ class VideoProcessor:
|
||||
output_dir: str, desc: str = "保存关键帧") -> None:
|
||||
"""
|
||||
保存关键帧到指定目录,文件名格式为:keyframe_帧序号_时间戳.jpg
|
||||
|
||||
Args:
|
||||
keyframes: 关键帧列表
|
||||
keyframe_indices: 关键帧索引列表
|
||||
output_dir: 输出目录
|
||||
desc: 进度条描述
|
||||
时间戳精确到毫秒,格式为:HHMMSSmmm
|
||||
"""
|
||||
if not os.path.exists(output_dir):
|
||||
os.makedirs(output_dir)
|
||||
@ -126,11 +134,13 @@ class VideoProcessor:
|
||||
for keyframe, frame_idx in tqdm(zip(keyframes, keyframe_indices),
|
||||
total=len(keyframes),
|
||||
desc=desc):
|
||||
# 计算精确到毫秒的时间戳
|
||||
timestamp = frame_idx / self.fps
|
||||
hours = int(timestamp // 3600)
|
||||
minutes = int((timestamp % 3600) // 60)
|
||||
seconds = int(timestamp % 60)
|
||||
time_str = f"{hours:02d}{minutes:02d}{seconds:02d}"
|
||||
milliseconds = int((timestamp % 1) * 1000) # 计算毫秒部分
|
||||
time_str = f"{hours:02d}{minutes:02d}{seconds:02d}{milliseconds:03d}"
|
||||
|
||||
output_path = os.path.join(output_dir,
|
||||
f'keyframe_{frame_idx:06d}_{time_str}.jpg')
|
||||
@ -138,11 +148,7 @@ class VideoProcessor:
|
||||
|
||||
def extract_frames_by_numbers(self, frame_numbers: List[int], output_folder: str) -> None:
|
||||
"""
|
||||
根据指定的帧号提取帧,如果多个帧在同一秒内,只保留一个
|
||||
|
||||
Args:
|
||||
frame_numbers: 要提取的帧号列表
|
||||
output_folder: 输出文件夹路径
|
||||
根据指定的帧号提取帧,如果多个帧在同一毫秒内,只保留一个
|
||||
"""
|
||||
if not frame_numbers:
|
||||
raise ValueError("未提供帧号列表")
|
||||
@ -153,29 +159,31 @@ class VideoProcessor:
|
||||
if not os.path.exists(output_folder):
|
||||
os.makedirs(output_folder)
|
||||
|
||||
# 用于记录已处理的时间戳(秒)
|
||||
processed_seconds = set()
|
||||
# 用于记录已处理的时间戳(毫秒)
|
||||
processed_timestamps = set()
|
||||
|
||||
for frame_number in tqdm(frame_numbers, desc="提取高清帧"):
|
||||
# 计算时间戳(秒)
|
||||
timestamp_seconds = int(frame_number / self.fps)
|
||||
# 计算精确到毫秒的时间戳
|
||||
timestamp = frame_number / self.fps
|
||||
timestamp_ms = int(timestamp * 1000) # 转换为毫秒
|
||||
|
||||
# 如果这一秒已经处理过,跳过
|
||||
if timestamp_seconds in processed_seconds:
|
||||
# 如果这一毫秒已经处理过,跳过
|
||||
if timestamp_ms in processed_timestamps:
|
||||
continue
|
||||
|
||||
self.cap.set(cv2.CAP_PROP_POS_FRAMES, frame_number)
|
||||
ret, frame = self.cap.read()
|
||||
|
||||
if ret:
|
||||
# 记录这一秒已经处理
|
||||
processed_seconds.add(timestamp_seconds)
|
||||
# 记录这一毫秒已经处理
|
||||
processed_timestamps.add(timestamp_ms)
|
||||
|
||||
# 计算时间戳字符串
|
||||
hours = int(timestamp_seconds // 3600)
|
||||
minutes = int((timestamp_seconds % 3600) // 60)
|
||||
seconds = int(timestamp_seconds % 60)
|
||||
time_str = f"{hours:02d}{minutes:02d}{seconds:02d}"
|
||||
hours = int(timestamp // 3600)
|
||||
minutes = int((timestamp % 3600) // 60)
|
||||
seconds = int(timestamp % 60)
|
||||
milliseconds = int((timestamp % 1) * 1000) # 计算毫秒部分
|
||||
time_str = f"{hours:02d}{minutes:02d}{seconds:02d}{milliseconds:03d}"
|
||||
|
||||
output_path = os.path.join(output_folder,
|
||||
f"keyframe_{frame_number:06d}_{time_str}.jpg")
|
||||
@ -183,27 +191,34 @@ class VideoProcessor:
|
||||
else:
|
||||
logger.info(f"无法读取帧 {frame_number}")
|
||||
|
||||
logger.info(f"共提取了 {len(processed_seconds)} 个不同时间戳的帧")
|
||||
logger.info(f"共提取了 {len(processed_timestamps)} 个不同时间戳的帧")
|
||||
|
||||
@staticmethod
|
||||
def extract_numbers_from_folder(folder_path: str) -> List[int]:
|
||||
"""
|
||||
从文件夹中提取帧号
|
||||
|
||||
|
||||
Args:
|
||||
folder_path: 关键帧文件夹路径
|
||||
|
||||
|
||||
Returns:
|
||||
List[int]: 排序后的帧号列表
|
||||
"""
|
||||
files = [f for f in os.listdir(folder_path) if f.endswith('.jpg')]
|
||||
# 更新正则表达式以匹配新的文件名格式:keyframe_000123_010534.jpg
|
||||
pattern = re.compile(r'keyframe_(\d+)_\d+\.jpg$')
|
||||
# 更新正则表达式以匹配新的文件名格式:keyframe_000123_010534123.jpg
|
||||
pattern = re.compile(r'keyframe_(\d+)_\d{9}\.jpg$')
|
||||
numbers = []
|
||||
|
||||
for f in files:
|
||||
match = pattern.search(f)
|
||||
if match:
|
||||
numbers.append(int(match.group(1)))
|
||||
else:
|
||||
logger.warning(f"文件名格式不匹配: {f}")
|
||||
|
||||
if not numbers:
|
||||
logger.error(f"在目录 {folder_path} 中未找到有效的关键帧文件")
|
||||
|
||||
return sorted(numbers)
|
||||
|
||||
def process_video(self, output_dir: str, skip_seconds: float = 0, threshold: int = 30) -> None:
|
||||
@ -212,7 +227,7 @@ class VideoProcessor:
|
||||
|
||||
Args:
|
||||
output_dir: 输出目录
|
||||
skip_seconds: 跳过视<EFBFBD><EFBFBD><EFBFBD>开头的秒数
|
||||
skip_seconds: 跳过视频开头的秒数
|
||||
"""
|
||||
skip_frames = int(skip_seconds * self.fps)
|
||||
|
||||
@ -240,11 +255,14 @@ class VideoProcessor:
|
||||
def process_video_pipeline(self,
|
||||
output_dir: str,
|
||||
skip_seconds: float = 0,
|
||||
threshold: int = 30,
|
||||
threshold: int = 20, # 降低默认阈值
|
||||
compressed_width: int = 320,
|
||||
keep_temp: bool = False) -> None:
|
||||
"""
|
||||
执行完整的视频处理流程:压缩、提取关键帧、导出高清帧
|
||||
执行完整的视频处理流程
|
||||
|
||||
Args:
|
||||
threshold: 降低默认阈值为20,使场景检测更敏感
|
||||
"""
|
||||
os.makedirs(output_dir, exist_ok=True)
|
||||
temp_dir = os.path.join(output_dir, 'temp')
|
||||
@ -358,7 +376,7 @@ if __name__ == "__main__":
|
||||
import time
|
||||
|
||||
start_time = time.time()
|
||||
processor = VideoProcessor("best.mp4")
|
||||
processor.process_video_pipeline(output_dir="output4")
|
||||
processor = VideoProcessor("E:\\projects\\NarratoAI\\resource\\videos\\test.mp4")
|
||||
processor.process_video_pipeline(output_dir="output")
|
||||
end_time = time.time()
|
||||
print(f"处理完成!总耗时: {end_time - start_time:.2f} 秒")
|
||||
|
||||
@ -1,5 +1,5 @@
|
||||
[app]
|
||||
project_version="0.3.5"
|
||||
project_version="0.3.9"
|
||||
# 支持视频理解的大模型提供商
|
||||
# gemini
|
||||
# NarratoAPI
|
||||
|
||||
3
main.py
3
main.py
@ -1,3 +1,4 @@
|
||||
import os
|
||||
import uvicorn
|
||||
from loguru import logger
|
||||
|
||||
@ -7,6 +8,8 @@ if __name__ == "__main__":
|
||||
logger.info(
|
||||
"start server, docs: http://127.0.0.1:" + str(config.listen_port) + "/docs"
|
||||
)
|
||||
os.environ["HTTP_PROXY"] = config.proxy.get("http")
|
||||
os.environ["HTTPS_PROXY"] = config.proxy.get("https")
|
||||
uvicorn.run(
|
||||
app="app.asgi:app",
|
||||
host=config.listen_host,
|
||||
|
||||
@ -1,5 +1,5 @@
|
||||
requests~=2.31.0
|
||||
moviepy~=2.0.0.dev2
|
||||
moviepy==2.0.0.dev2
|
||||
faster-whisper~=1.0.1
|
||||
edge_tts~=6.1.15
|
||||
uvicorn~=0.27.1
|
||||
@ -26,9 +26,12 @@ psutil>=5.9.0
|
||||
opencv-python~=4.10.0.84
|
||||
scikit-learn~=1.5.2
|
||||
google-generativeai~=0.8.3
|
||||
Pillow>=11.0.0
|
||||
pillow==10.3.0
|
||||
python-dotenv~=1.0.1
|
||||
openai~=1.53.0
|
||||
tqdm>=4.66.6
|
||||
tenacity>=9.0.0
|
||||
tiktoken==0.8.0
|
||||
tiktoken==0.8.0
|
||||
yt-dlp==2024.11.18
|
||||
pysrt==1.1.2
|
||||
httpx==0.27.2
|
||||
|
||||
1
resource/fonts/fonts_in_here.txt
Normal file
1
resource/fonts/fonts_in_here.txt
Normal file
@ -0,0 +1 @@
|
||||
此处放字体文件
|
||||
0
resource/scripts/script_in_here.txt
Normal file
0
resource/scripts/script_in_here.txt
Normal file
0
resource/songs/song_in_here.txt
Normal file
0
resource/songs/song_in_here.txt
Normal file
0
resource/srt/srt_in_here.txt
Normal file
0
resource/srt/srt_in_here.txt
Normal file
0
resource/videos/video_in_here.txt
Normal file
0
resource/videos/video_in_here.txt
Normal file
178
video_pipeline.py
Normal file
178
video_pipeline.py
Normal file
@ -0,0 +1,178 @@
|
||||
import requests
|
||||
import json
|
||||
import os
|
||||
import time
|
||||
from typing import Dict, Any
|
||||
|
||||
class VideoPipeline:
|
||||
def __init__(self, base_url: str = "http://127.0.0.1:8080"):
|
||||
self.base_url = base_url
|
||||
|
||||
def download_video(self, url: str, resolution: str = "1080p",
|
||||
output_format: str = "mp4", rename: str = None) -> Dict[str, Any]:
|
||||
"""下载视频的第一步"""
|
||||
endpoint = f"{self.base_url}/api/v2/youtube/download"
|
||||
payload = {
|
||||
"url": url,
|
||||
"resolution": resolution,
|
||||
"output_format": output_format,
|
||||
"rename": rename or time.strftime("%Y-%m-%d")
|
||||
}
|
||||
|
||||
response = requests.post(endpoint, json=payload)
|
||||
response.raise_for_status()
|
||||
return response.json()
|
||||
|
||||
def generate_script(self, video_path: str, skip_seconds: int = 0,
|
||||
threshold: int = 30, vision_batch_size: int = 10,
|
||||
vision_llm_provider: str = "gemini") -> Dict[str, Any]:
|
||||
"""生成脚本的第二步"""
|
||||
endpoint = f"{self.base_url}/api/v2/scripts/generate"
|
||||
payload = {
|
||||
"video_path": video_path,
|
||||
"skip_seconds": skip_seconds,
|
||||
"threshold": threshold,
|
||||
"vision_batch_size": vision_batch_size,
|
||||
"vision_llm_provider": vision_llm_provider
|
||||
}
|
||||
|
||||
response = requests.post(endpoint, json=payload)
|
||||
response.raise_for_status()
|
||||
return response.json()
|
||||
|
||||
def crop_video(self, video_path: str, script: list) -> Dict[str, Any]:
|
||||
"""剪辑视频的第三步"""
|
||||
endpoint = f"{self.base_url}/api/v2/scripts/crop"
|
||||
payload = {
|
||||
"video_origin_path": video_path,
|
||||
"video_script": script
|
||||
}
|
||||
|
||||
response = requests.post(endpoint, json=payload)
|
||||
response.raise_for_status()
|
||||
return response.json()
|
||||
|
||||
def generate_final_video(self, task_id: str, video_path: str,
|
||||
script_path: str, script: list, subclip_videos: Dict[str, str], voice_name: str) -> Dict[str, Any]:
|
||||
"""生成最终视频的第四步"""
|
||||
endpoint = f"{self.base_url}/api/v2/scripts/start-subclip"
|
||||
|
||||
request_data = {
|
||||
"video_clip_json": script,
|
||||
"video_clip_json_path": script_path,
|
||||
"video_origin_path": video_path,
|
||||
"video_aspect": "16:9",
|
||||
"video_language": "zh-CN",
|
||||
"voice_name": voice_name,
|
||||
"voice_volume": 1,
|
||||
"voice_rate": 1.2,
|
||||
"voice_pitch": 1,
|
||||
"bgm_name": "random",
|
||||
"bgm_type": "random",
|
||||
"bgm_file": "",
|
||||
"bgm_volume": 0.3,
|
||||
"subtitle_enabled": True,
|
||||
"subtitle_position": "bottom",
|
||||
"font_name": "STHeitiMedium.ttc",
|
||||
"text_fore_color": "#FFFFFF",
|
||||
"text_background_color": "transparent",
|
||||
"font_size": 75,
|
||||
"stroke_color": "#000000",
|
||||
"stroke_width": 1.5,
|
||||
"custom_position": 70,
|
||||
"n_threads": 8
|
||||
}
|
||||
|
||||
payload = {
|
||||
"request": request_data,
|
||||
"subclip_videos": subclip_videos
|
||||
}
|
||||
|
||||
params = {"task_id": task_id}
|
||||
response = requests.post(endpoint, params=params, json=payload)
|
||||
response.raise_for_status()
|
||||
return response.json()
|
||||
|
||||
def save_script_to_json(self, script: list, script_path: str) -> str:
|
||||
"""保存脚本到json文件"""
|
||||
try:
|
||||
with open(script_path, 'w', encoding='utf-8') as f:
|
||||
json.dump(script, f, ensure_ascii=False, indent=2)
|
||||
print(f"脚本已保存到: {script_path}")
|
||||
return script_path
|
||||
except Exception as e:
|
||||
print(f"保存脚本失败: {str(e)}")
|
||||
raise
|
||||
|
||||
def run_pipeline(self, task_id: str, script_name: str, youtube_url: str, video_name: str="null", skip_seconds: int = 0, threshold: int = 30, vision_batch_size: int = 10, vision_llm_provider: str = "gemini", voice_name: str = "zh-CN-YunjianNeural") -> Dict[str, Any]:
|
||||
"""运行完整的pipeline"""
|
||||
try:
|
||||
current_path = os.path.dirname(os.path.abspath(__file__))
|
||||
video_path = os.path.join(current_path, "resource", "videos", f"{video_name}.mp4")
|
||||
# 判断视频是否存在
|
||||
if not os.path.exists(video_path):
|
||||
# 1. 下载视频
|
||||
print(f"视频不存在, 开始下载视频: {video_path}")
|
||||
download_result = self.download_video(url=youtube_url, resolution="1080p", output_format="mp4", rename=video_name)
|
||||
video_path = download_result["output_path"]
|
||||
else:
|
||||
print(f"视频已存在: {video_path}")
|
||||
|
||||
# 2. 判断script_name是否存在
|
||||
# 2.1.1 拼接脚本路径 NarratoAI/resource/scripts
|
||||
script_path = os.path.join(current_path, "resource", "scripts", script_name)
|
||||
if os.path.exists(script_path):
|
||||
script = json.load(open(script_path, "r", encoding="utf-8"))
|
||||
else:
|
||||
# 2.1.2 生成脚本
|
||||
print("开始生成脚本...")
|
||||
script_result = self.generate_script(video_path=video_path, skip_seconds=skip_seconds, threshold=threshold, vision_batch_size=vision_batch_size, vision_llm_provider=vision_llm_provider)
|
||||
script = script_result["script"]
|
||||
|
||||
# 2.2 保存脚本到json文件
|
||||
print("保存脚本到json文件...")
|
||||
self.save_script_to_json(script=script, script_path=script_path)
|
||||
|
||||
# 3. 剪辑视频
|
||||
print("开始剪辑视频...")
|
||||
crop_result = self.crop_video(video_path=video_path, script=script)
|
||||
subclip_videos = crop_result["subclip_videos"]
|
||||
|
||||
# 4. 生成最终视频
|
||||
print("开始生成最终视频...")
|
||||
self.generate_final_video(
|
||||
task_id=task_id,
|
||||
video_path=video_path,
|
||||
script_path=script_path,
|
||||
script=script,
|
||||
subclip_videos=subclip_videos,
|
||||
voice_name=voice_name
|
||||
)
|
||||
|
||||
return {
|
||||
"status": "等待异步生成视频",
|
||||
"path": os.path.join(current_path, "storage", "tasks", task_id)
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
return {
|
||||
"status": "error",
|
||||
"error": str(e)
|
||||
}
|
||||
|
||||
|
||||
# 使用示例
|
||||
if __name__ == "__main__":
|
||||
pipeline = VideoPipeline()
|
||||
result = pipeline.run_pipeline(
|
||||
task_id="test_111901",
|
||||
script_name="test.json",
|
||||
youtube_url="https://www.youtube.com/watch?v=vLJ7Yed6FQ4",
|
||||
video_name="2024-11-19-01",
|
||||
skip_seconds=50,
|
||||
threshold=35,
|
||||
vision_batch_size=10,
|
||||
vision_llm_provider="gemini",
|
||||
voice_name="zh-CN-YunjianNeural",
|
||||
)
|
||||
print(result)
|
||||
8
webui.py
8
webui.py
@ -3,7 +3,7 @@ import os
|
||||
import sys
|
||||
from uuid import uuid4
|
||||
from app.config import config
|
||||
from webui.components import basic_settings, video_settings, audio_settings, subtitle_settings, script_settings, review_settings
|
||||
from webui.components import basic_settings, video_settings, audio_settings, subtitle_settings, script_settings, review_settings, merge_settings, system_settings
|
||||
from webui.utils import cache, file_utils
|
||||
from app.utils import utils
|
||||
from app.models.schema import VideoClipParams, VideoAspect
|
||||
@ -178,7 +178,9 @@ def main():
|
||||
|
||||
# 渲染基础设置面板
|
||||
basic_settings.render_basic_settings(tr)
|
||||
|
||||
# 渲染合并设置
|
||||
merge_settings.render_merge_settings(tr)
|
||||
|
||||
# 渲染主面板
|
||||
panel = st.columns(3)
|
||||
with panel[0]:
|
||||
@ -188,6 +190,8 @@ def main():
|
||||
audio_settings.render_audio_panel(tr)
|
||||
with panel[2]:
|
||||
subtitle_settings.render_subtitle_panel(tr)
|
||||
# 渲染系统设置面板
|
||||
system_settings.render_system_panel(tr)
|
||||
|
||||
# 渲染视频审查面板
|
||||
review_settings.render_review_panel(tr)
|
||||
|
||||
325
webui.txt
325
webui.txt
@ -47,3 +47,328 @@ pause
|
||||
|
||||
rem set HF_ENDPOINT=https://hf-mirror.com
|
||||
streamlit run webui.py --browser.serverAddress="127.0.0.1" --server.enableCORS=True --server.maxUploadSize=2048 --browser.gatherUsageStats=False
|
||||
|
||||
请求0:
|
||||
curl -X 'POST' \
|
||||
'http://127.0.0.1:8080/api/v2/youtube/download' \
|
||||
-H 'accept: application/json' \
|
||||
-H 'Content-Type: application/json' \
|
||||
-d '{
|
||||
"url": "https://www.youtube.com/watch?v=Kenm35gdqtk",
|
||||
"resolution": "1080p",
|
||||
"output_format": "mp4",
|
||||
"rename": "2024-11-19"
|
||||
}'
|
||||
{
|
||||
"url": "https://www.youtube.com/watch?v=Kenm35gdqtk",
|
||||
"resolution": "1080p",
|
||||
"output_format": "mp4",
|
||||
"rename": "2024-11-19"
|
||||
}
|
||||
|
||||
请求1:
|
||||
curl -X 'POST' \
|
||||
'http://127.0.0.1:8080/api/v2/scripts/generate' \
|
||||
-H 'accept: application/json' \
|
||||
-H 'Content-Type: application/json' \
|
||||
-d '{
|
||||
"video_path": "E:\\projects\\NarratoAI\\resource\\videos\\test.mp4",
|
||||
"skip_seconds": 0,
|
||||
"threshold": 30,
|
||||
"vision_batch_size": 10,
|
||||
"vision_llm_provider": "gemini"
|
||||
}'
|
||||
{
|
||||
"video_path": "E:\\projects\\NarratoAI\\resource\\videos\\test.mp4",
|
||||
"skip_seconds": 0,
|
||||
"threshold": 30,
|
||||
"vision_batch_size": 10,
|
||||
"vision_llm_provider": "gemini"
|
||||
}
|
||||
|
||||
请求2:
|
||||
curl -X 'POST' \
|
||||
'http://127.0.0.1:8080/api/v2/scripts/crop' \
|
||||
-H 'accept: application/json' \
|
||||
-H 'Content-Type: application/json' \
|
||||
-d '{
|
||||
"video_origin_path": "E:\\projects\\NarratoAI\\resource\\videos\\test.mp4",
|
||||
"video_script": [
|
||||
{
|
||||
"timestamp": "00:10-01:01",
|
||||
"picture": "好的,以下是视频画面的客观描述:\n\n视频展现一名留着胡须的男子在森林里挖掘。\n\n画面首先展现男子从后方视角,背着军绿色背包,穿着卡其色长裤和深色T恤,走向一个泥土斜坡。背包上似乎有一个镐头。\n\n下一个镜头特写展现了该背包,一个镐头从背包里伸出来,包里还有一些其他工具。\n\n然后,视频显示该男子用镐头挖掘泥土斜坡。\n\n接下来是一些近景镜头,展现男子的靴子在泥土中行走,以及男子用手清理泥土。\n\n其他镜头从不同角度展现该男子在挖掘,包括从侧面和上方。\n\n可以看到他用工具挖掘,清理泥土,并检查挖出的土壤。\n\n最后,一个镜头展现了挖出的土壤的质地和颜色。",
|
||||
"narration": "好的,接下来就是我们这位“胡须大侠”的精彩冒险了!只见他背着军绿色的背包,迈着比我上班还不情愿的步伐走向那泥土斜坡。哎呀,这个背包可真是个宝贝,里面藏着一把镐头和一些工具,简直像是个随身携带的“建筑工具箱”! \n\n看他挥舞着镐头,挖掘泥土的姿势,仿佛在进行一场“挖土大赛”,结果却比我做饭还要糟糕。泥土飞扬中,他的靴子也成了“泥巴艺术家”。最后,那堆色泽各异的土壤就像他心情的写照——五彩斑斓又略显混乱!真是一次让人捧腹的建造之旅!",
|
||||
"OST": 2,
|
||||
"new_timestamp": "00:00-00:51"
|
||||
},
|
||||
{
|
||||
"timestamp": "01:07-01:53",
|
||||
"picture": "好的,以下是视频画面的客观描述:\n\n视频以一系列森林环境的镜头开头。\n\n第一个镜头是一个特写镜头,镜头中显示的是一些带有水滴的绿色叶子。\n\n第二个镜头显示一个留着胡须的男子在森林中挖掘一个洞。 他跪在地上,用工具挖土。\n\n第三个镜头是一个中等镜头,显示同一个人坐在他挖好的洞边休息。\n\n第四个镜头显示该洞的内部结构,该洞在树根和地面之间。\n\n第五个镜头显示该男子用斧头砍树枝。\n\n第六个镜头显示一堆树枝横跨一个泥泞的小水坑。\n\n第七个镜头显示更多茂盛的树叶和树枝在阳光下。\n\n第八个镜头显示更多茂盛的树叶和树枝。\n\n\n",
|
||||
"narration": "接下来,我们的“挖土大师”又开始了他的森林探险。看这镜头,水滴在叶子上闪烁,仿佛在说:“快来,快来,这里有故事!”他一边挖洞,一边像个新手厨师试图切洋葱——每一下都小心翼翼,生怕自己不小心挖出个“历史遗址”。坐下休息的时候,脸上的表情就像发现新大陆一样!然后,他拿起斧头砍树枝,简直是现代版的“神雕侠侣”,只不过对象是树木。最后,那堆树枝架过泥泞的小水坑,仿佛在说:“我就是不怕湿脚的勇士!”这就是我们的建造之旅!",
|
||||
"OST": 2,
|
||||
"new_timestamp": "00:51-01:37"
|
||||
}
|
||||
]
|
||||
}'
|
||||
{
|
||||
"video_origin_path": "E:\\projects\\NarratoAI\\resource\\videos\\test.mp4",
|
||||
"video_script": [
|
||||
{
|
||||
"timestamp": "00:10-01:01",
|
||||
"picture": "好的,以下是视频画面的客观描述:\n\n视频展现一名留着胡须的男子在森林里挖掘。\n\n画面首先展现男子从后方视角,背着军绿色背包,穿着卡其色长裤和深色T恤,走向一个泥土斜坡。背包上似乎有一个镐头。\n\n下一个镜头特写展现了该背包,一个镐头从背包里伸出来,包里还有一些其他工具。\n\n然后,视频显示该男子用镐头挖掘泥土斜坡。\n\n接下来是一些近景镜头,展现男子的靴子在泥土中行走,以及男子用手清理泥土。\n\n其他镜头从不同角度展现该男子在挖掘,包括从侧面和上方。\n\n可以看到他用工具挖掘,清理泥土,并检查挖出的土壤。\n\n最后,一个镜头展现了挖出的土壤的质地和颜色。",
|
||||
"narration": "好的,接下来就是我们这位“胡须大侠”的精彩冒险了!只见他背着军绿色的背包,迈着比我上班还不情愿的步伐走向那泥土斜坡。哎呀,这个背包可真是个宝贝,里面藏着一把镐头和一些工具,简直像是个随身携带的“建筑工具箱”! \n\n看他挥舞着镐头,挖掘泥土的姿势,仿佛在进行一场“挖土大赛”,结果却比我做饭还要糟糕。泥土飞扬中,他的靴子也成了“泥巴艺术家”。最后,那堆色泽各异的土壤就像他心情的写照——五彩斑斓又略显混乱!真是一次让人捧腹的建造之旅!",
|
||||
"OST": 2,
|
||||
"new_timestamp": "00:00-00:51"
|
||||
},
|
||||
{
|
||||
"timestamp": "01:07-01:53",
|
||||
"picture": "好的,以下是视频画面的客观描述:\n\n视频以一系列森林环境的镜头开头。\n\n第一个镜头是一个特写镜头,镜头中显示的是一些带有水滴的绿色叶子。\n\n第二个镜头显示一个留着胡须的男子在森林中挖掘一个洞。 他跪在地上,用工具挖土。\n\n第三个镜头是一个中等镜头,显示同一个人坐在他挖好的洞边休息。\n\n第四个镜头显示该洞的内部结构,该洞在树根和地面之间。\n\n第五个镜头显示该男子用斧头砍树枝。\n\n第六个镜头显示一堆树枝横跨一个泥泞的小水坑。\n\n第七个镜头显示更多茂盛的树叶和树枝在阳光下。\n\n第八个镜头显示更多茂盛的树叶和树枝。\n\n\n",
|
||||
"narration": "接下来,我们的“挖土大师”又开始了他的森林探险。看这镜头,水滴在叶子上闪烁,仿佛在说:“快来,快来,这里有故事!”他一边挖洞,一边像个新手厨师试图切洋葱——每一下都小心翼翼,生怕自己不小心挖出个“历史遗址”。坐下休息的时候,脸上的表情就像发现新大陆一样!然后,他拿起斧头砍树枝,简直是现代版的“神雕侠侣”,只不过对象是树木。最后,那堆树枝架过泥泞的小水坑,仿佛在说:“我就是不怕湿脚的勇士!”这就是我们的建造之旅!",
|
||||
"OST": 2,
|
||||
"new_timestamp": "00:51-01:37"
|
||||
}
|
||||
]
|
||||
}
|
||||
|
||||
请求3:
|
||||
curl -X 'POST' \
|
||||
'http://127.0.0.1:8080/api/v2/scripts/start-subclip?task_id=12121' \
|
||||
-H 'accept: application/json' \
|
||||
-H 'Content-Type: application/json' \
|
||||
-d '{
|
||||
"request": {
|
||||
"video_clip_json": [
|
||||
{
|
||||
"timestamp": "00:10-01:01",
|
||||
"picture": "好的,以下是视频画面的客观描述:\n\n视频展现一名留着胡须的男子在森林里挖掘。\n\n画面首先展现男子从后方视角,背着军绿色背包,穿着卡其色长裤和深色T恤,走向一个泥土斜坡。背包上似乎有一个镐头。\n\n下一个镜头特写展现了该背包,一个镐头从背包里伸出来,包里还有一些其他工具。\n\n然后,视频显示该男子用镐头挖掘泥土斜坡。\n\n接下来是一些近景镜头,展现男子的靴子在泥土中行走,以及男子用手清理泥土。\n\n其他镜头从不同角度展现该男子在挖掘,包括从侧面和上方。\n\n可以看到他用工具挖掘,清理泥土,并检查挖出的土壤。\n\n最后,一个镜头展现了挖出的土壤的质地和颜色。",
|
||||
"narration": "好的,接下来就是我们这位“胡须大侠”的精彩冒险了!只见他背着军绿色的背包,迈着比我上班还不情愿的步伐走向那泥土斜坡。哎呀,这个背包可真是个宝贝,里面藏着一把镐头和一些工具,简直像是个随身携带的“建筑工具箱”! \n\n看他挥舞着镐头,挖掘泥土的姿势,仿佛在进行一场“挖土大赛”,结果却比我做饭还要糟糕。泥土飞扬中,他的靴子也成了“泥巴艺术家”。最后,那堆色泽各异的土壤就像他心情的写照——五彩斑斓又略显混乱!真是一次让人捧腹的建造之旅!",
|
||||
"OST": 2,
|
||||
"new_timestamp": "00:00-00:51"
|
||||
},
|
||||
{
|
||||
"timestamp": "01:07-01:53",
|
||||
"picture": "好的,以下是视频画面的客观描述:\n\n视频以一系列森林环境的镜头开头。\n\n第一个镜头是一个特写镜头,镜头中显示的是一些带有水滴的绿色叶子。\n\n第二个镜头显示一个留着胡须的男子在森林中挖掘一个洞。 他跪在地上,用工具挖土。\n\n第三个镜头是一个中等镜头,显示同一个人坐在他挖好的洞边休息。\n\n第四个镜头显示该洞的内部结构,该洞在树根和地面之间。\n\n第五个镜头显示该男子用斧头砍树枝。\n\n第六个镜头显示一堆树枝横跨一个泥泞的小水坑。\n\n第七个镜头显示更多茂盛的树叶和树枝在阳光下。\n\n第八个镜头显示更多茂盛的树叶和树枝。\n\n\n",
|
||||
"narration": "接下来,我们的“挖土大师”又开始了他的森林探险。看这镜头,水滴在叶子上闪烁,仿佛在说:“快来,快来,这里有故事!”他一边挖洞,一边像个新手厨师试图切洋葱——每一下都小心翼翼,生怕自己不小心挖出个“历史遗址”。坐下休息的时候,脸上的表情就像发现新大陆一样!然后,他拿起斧头砍树枝,简直是现代版的“神雕侠侣”,只不过对象是树木。最后,那堆树枝架过泥泞的小水坑,仿佛在说:“我就是不怕湿脚的勇士!”这就是我们的建造之旅!",
|
||||
"OST": 2,
|
||||
"new_timestamp": "00:51-01:37"
|
||||
}
|
||||
],
|
||||
"video_clip_json_path": "E:\\projects\\NarratoAI\\resource\\scripts\\2024-1118-230421.json",
|
||||
"video_origin_path": "E:\\projects\\NarratoAI\\resource\\videos\\test.mp4",
|
||||
"video_aspect": "16:9",
|
||||
"video_language": "zh-CN",
|
||||
"voice_name": "zh-CN-YunjianNeural",
|
||||
"voice_volume": 1,
|
||||
"voice_rate": 1.2,
|
||||
"voice_pitch": 1,
|
||||
"bgm_name": "random",
|
||||
"bgm_type": "random",
|
||||
"bgm_file": "",
|
||||
"bgm_volume": 0.3,
|
||||
"subtitle_enabled": true,
|
||||
"subtitle_position": "bottom",
|
||||
"font_name": "STHeitiMedium.ttc",
|
||||
"text_fore_color": "#FFFFFF",
|
||||
"text_background_color": "transparent",
|
||||
"font_size": 75,
|
||||
"stroke_color": "#000000",
|
||||
"stroke_width": 1.5,
|
||||
"custom_position": 70,
|
||||
"n_threads": 8
|
||||
},
|
||||
"subclip_videos": {
|
||||
"00:10-01:01": "E:\\projects\\NarratoAI\\storage\\cache_videos/vid-00_10-01_01.mp4",
|
||||
"01:07-01:53": "E:\\projects\\NarratoAI\\storage\\cache_videos/vid-01_07-01_53.mp4"
|
||||
}
|
||||
}'
|
||||
{
|
||||
"request": {
|
||||
"video_clip_json": [
|
||||
{
|
||||
"timestamp": "00:10-01:01",
|
||||
"picture": "好的,以下是视频画面的客观描述:\n\n视频展现一名留着胡须的男子在森林里挖掘。\n\n画面首先展现男子从后方视角,背着军绿色背包,穿着卡其色长裤和深色T恤,走向一个泥土斜坡。背包上似乎有一个镐头。\n\n下一个镜头特写展现了该背包,一个镐头从背包里伸出来,包里还有一些其他工具。\n\n然后,视频显示该男子用镐头挖掘泥土斜坡。\n\n接下来是一些近景镜头,展现男子的靴子在泥土中行走,以及男子用手清理泥土。\n\n其他镜头从不同角度展现该男子在挖掘,包括从侧面和上方。\n\n可以看到他用工具挖掘,清理泥土,并检查挖出的土壤。\n\n最后,一个镜头展现了挖出的土壤的质地和颜色。",
|
||||
"narration": "好的,接下来就是我们这位“胡须大侠”的精彩冒险了!只见他背着军绿色的背包,迈着比我上班还不情愿的步伐走向那泥土斜坡。哎呀,这个背包可真是个宝贝,里面藏着一把镐头和一些工具,简直像是个随身携带的“建筑工具箱”! \n\n看他挥舞着镐头,挖掘泥土的姿势,仿佛在进行一场“挖土大赛”,结果却比我做饭还要糟糕。泥土飞扬中,他的靴子也成了“泥巴艺术家”。最后,那堆色泽各异的土壤就像他心情的写照——五彩斑斓又略显混乱!真是一次让人捧腹的建造之旅!",
|
||||
"OST": 2,
|
||||
"new_timestamp": "00:00-00:51"
|
||||
},
|
||||
{
|
||||
"timestamp": "01:07-01:53",
|
||||
"picture": "好的,以下是视频画面的客观描述:\n\n视频以一系列森林环境的镜头开头。\n\n第一个镜头是一个特写镜头,镜头中显示的是一些带有水滴的绿色叶子。\n\n第二个镜头显示一个留着胡须的男子在森林中挖掘一个洞。 他跪在地上,用工具挖土。\n\n第三个镜头是一个中等镜头,显示同一个人坐在他挖好的洞边休息。\n\n第四个镜头显示该洞的内部结构,该洞在树根和地面之间。\n\n第五个镜头显示该男子用斧头砍树枝。\n\n第六个镜头显示一堆树枝横跨一个泥泞的小水坑。\n\n第七个镜头显示更多茂盛的树叶和树枝在阳光下。\n\n第八个镜头显示更多茂盛的树叶和树枝。\n\n\n",
|
||||
"narration": "接下来,我们的“挖土大师”又开始了他的森林探险。看这镜头,水滴在叶子上闪烁,仿佛在说:“快来,快来,这里有故事!”他一边挖洞,一边像个新手厨师试图切洋葱——每一下都小心翼翼,生怕自己不小心挖出个“历史遗址”。坐下休息的时候,脸上的表情就像发现新大陆一样!然后,他拿起斧头砍树枝,简直是现代版的“神雕侠侣”,只不过对象是树木。最后,那堆树枝架过泥泞的小水坑,仿佛在说:“我就是不怕湿脚的勇士!”这就是我们的建造之旅!",
|
||||
"OST": 2,
|
||||
"new_timestamp": "00:51-01:37"
|
||||
}
|
||||
],
|
||||
"video_clip_json_path": "E:\\projects\\NarratoAI\\resource\\scripts\\2024-1118-230421.json",
|
||||
"video_origin_path": "E:\\projects\\NarratoAI\\resource\\videos\\test.mp4",
|
||||
"video_aspect": "16:9",
|
||||
"video_language": "zh-CN",
|
||||
"voice_name": "zh-CN-YunjianNeural",
|
||||
"voice_volume": 1,
|
||||
"voice_rate": 1.2,
|
||||
"voice_pitch": 1,
|
||||
"bgm_name": "random",
|
||||
"bgm_type": "random",
|
||||
"bgm_file": "",
|
||||
"bgm_volume": 0.3,
|
||||
"subtitle_enabled": true,
|
||||
"subtitle_position": "bottom",
|
||||
"font_name": "STHeitiMedium.ttc",
|
||||
"text_fore_color": "#FFFFFF",
|
||||
"text_background_color": "transparent",
|
||||
"font_size": 75,
|
||||
"stroke_color": "#000000",
|
||||
"stroke_width": 1.5,
|
||||
"custom_position": 70,
|
||||
"n_threads": 8
|
||||
},
|
||||
"subclip_videos": {
|
||||
"00:10-01:01": "E:\\projects\\NarratoAI\\storage\\cache_videos/vid-00_10-01_01.mp4",
|
||||
"01:07-01:53": "E:\\projects\\NarratoAI\\storage\\cache_videos/vid-01_07-01_53.mp4"
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
请在最外层新建一个pipeline 工作流执行逻辑的代码;
|
||||
他会按照下面的顺序请求接口
|
||||
1.下载视频
|
||||
curl -X 'POST' \
|
||||
'http://127.0.0.1:8080/api/v2/youtube/download' \
|
||||
-H 'accept: application/json' \
|
||||
-H 'Content-Type: application/json' \
|
||||
-d '{
|
||||
"url": "https://www.youtube.com/watch?v=Kenm35gdqtk",
|
||||
"resolution": "1080p",
|
||||
"output_format": "mp4",
|
||||
"rename": "2024-11-19"
|
||||
}'
|
||||
2.生成脚本
|
||||
curl -X 'POST' \
|
||||
'http://127.0.0.1:8080/api/v2/scripts/generate' \
|
||||
-H 'accept: application/json' \
|
||||
-H 'Content-Type: application/json' \
|
||||
-d '{
|
||||
"video_path": "E:\\projects\\NarratoAI\\resource\\videos\\test.mp4",
|
||||
"skip_seconds": 0,
|
||||
"threshold": 30,
|
||||
"vision_batch_size": 10,
|
||||
"vision_llm_provider": "gemini"
|
||||
}'
|
||||
3. 剪辑视频
|
||||
curl -X 'POST' \
|
||||
'http://127.0.0.1:8080/api/v2/scripts/crop' \
|
||||
-H 'accept: application/json' \
|
||||
-H 'Content-Type: application/json' \
|
||||
-d '{
|
||||
"video_origin_path": "E:\\projects\\NarratoAI\\resource\\videos\\test.mp4",
|
||||
"video_script": [
|
||||
{
|
||||
"timestamp": "00:10-01:01",
|
||||
"picture": "好的,以下是视频画面的客观描述:\n\n视频展现一名留着胡须的男子在森林里挖掘。\n\n画面首先展现男子从后方视角,背着军绿色背包,穿着卡其色长裤和深色T恤,走向一个泥土斜坡。背包上似乎有一个镐头。\n\n下一个镜头特写展现了该背包,一个镐头从背包里伸出来,包里还有一些其他工具。\n\n然后,视频显示该男子用镐头挖掘泥土斜坡。\n\n接下来是一些近景镜头,展现男子的靴子在泥土中行走,以及男子用手清理泥土。\n\n其他镜头从不同角度展现该男子在挖掘,包括从侧面和上方。\n\n可以看到他用工具挖掘,清理泥土,并检查挖出的土壤。\n\n最后,一个镜头展现了挖出的土壤的质地和颜色。",
|
||||
"narration": "好的,接下来就是我们这位“胡须大侠”的精彩冒险了!只见他背着军绿色的背包,迈着比我上班还不情愿的步伐走向那泥土斜坡。哎呀,这个背包可真是个宝贝,里面藏着一把镐头和一些工具,简直像是个随身携带的“建筑工具箱”! \n\n看他挥舞着镐头,挖掘泥土的姿势,仿佛在进行一场“挖土大赛”,结果却比我做饭还要糟糕。泥土飞扬中,他的靴子也成了“泥巴艺术家”。最后,那堆色泽各异的土壤就像他心情的写照——五彩斑斓又略显混乱!真是一次让人捧腹的建造之旅!",
|
||||
"OST": 2,
|
||||
"new_timestamp": "00:00-00:51"
|
||||
},
|
||||
{
|
||||
"timestamp": "01:07-01:53",
|
||||
"picture": "好的,以下是视频画面的客观描述:\n\n视频以一系列森林环境的镜头开头。\n\n第一个镜头是一个特写镜头,镜头中显示的是一些带有水滴的绿色叶子。\n\n第二个镜头显示一个留着胡须的男子在森林中挖掘一个洞。 他跪在地上,用工具挖土。\n\n第三个镜头是一个中等镜头,显示同一个人坐在他挖好的洞边休息。\n\n第四个镜头显示该洞的内部结构,该洞在树根和地面之间。\n\n第五个镜头显示该男子用斧头砍树枝。\n\n第六个镜头显示一堆树枝横跨一个泥泞的小水坑。\n\n第七个镜头显示更多茂盛的树叶和树枝在阳光下。\n\n第八个镜头显示更多茂盛的树叶和树枝。\n\n\n",
|
||||
"narration": "接下来,我们的“挖土大师”又开始了他的森林探险。看这镜头,水滴在叶子上闪烁,仿佛在说:“快来,快来,这里有故事!”他一边挖洞,一边像个新手厨师试图切洋葱——每一下都小心翼翼,生怕自己不小心挖出个“历史遗址”。坐下休息的时候,脸上的表情就像发现新大陆一样!然后,他拿起斧头砍树枝,简直是现代版的“神雕侠侣”,只不过对象是树木。最后,那堆树枝架过泥泞的小水坑,仿佛在说:“我就是不怕湿脚的勇士!”这就是我们的建造之旅!",
|
||||
"OST": 2,
|
||||
"new_timestamp": "00:51-01:37"
|
||||
}
|
||||
]
|
||||
}'
|
||||
4.生成视频
|
||||
curl -X 'POST' \
|
||||
'http://127.0.0.1:8080/api/v2/scripts/start-subclip?task_id=12121' \
|
||||
-H 'accept: application/json' \
|
||||
-H 'Content-Type: application/json' \
|
||||
-d '{
|
||||
"request": {
|
||||
"video_clip_json": [
|
||||
{
|
||||
"timestamp": "00:10-01:01",
|
||||
"picture": "好的,以下是视频画面的客观描述:\n\n视频展现一名留着胡须的男子在森林里挖掘。\n\n画面首先展现男子从后方视角,背着军绿色背包,穿着卡其色长裤和深色T恤,走向一个泥土斜坡。背包上似乎有一个镐头。\n\n下一个镜头特写展现了该背包,一个镐头从背包里伸出来,包里还有一些其他工具。\n\n然后,视频显示该男子用镐头挖掘泥土斜坡。\n\n接下来是一些近景镜头,展现男子的靴子在泥土中行走,以及男子用手清理泥土。\n\n其他镜头从不同角度展现该男子在挖掘,包括从侧面和上方。\n\n可以看到他用工具挖掘,清理泥土,并检查挖出的土壤。\n\n最后,一个镜头展现了挖出的土壤的质地和颜色。",
|
||||
"narration": "好的,接下来就是我们这位“胡须大侠”的精彩冒险了!只见他背着军绿色的背包,迈着比我上班还不情愿的步伐走向那泥土斜坡。哎呀,这个背包可真是个宝贝,里面藏着一把镐头和一些工具,简直像是个随身携带的“建筑工具箱”! \n\n看他挥舞着镐头,挖掘泥土的姿势,仿佛在进行一场“挖土大赛”,结果却比我做饭还要糟糕。泥土飞扬中,他的靴子也成了“泥巴艺术家”。最后,那堆色泽各异的土壤就像他心情的写照——五彩斑斓又略显混乱!真是一次让人捧腹的建造之旅!",
|
||||
"OST": 2,
|
||||
"new_timestamp": "00:00-00:51"
|
||||
},
|
||||
{
|
||||
"timestamp": "01:07-01:53",
|
||||
"picture": "好的,以下是视频画面的客观描述:\n\n视频以一系列森林环境的镜头开头。\n\n第一个镜头是一个特写镜头,镜头中显示的是一些带有水滴的绿色叶子。\n\n第二个镜头显示一个留着胡须的男子在森林中挖掘一个洞。 他跪在地上,用工具挖土。\n\n第三个镜头是一个中等镜头,显示同一个人坐在他挖好的洞边休息。\n\n第四个镜头显示该洞的内部结构,该洞在树根和地面之间。\n\n第五个镜头显示该男子用斧头砍树枝。\n\n第六个镜头显示一堆树枝横跨一个泥泞的小水坑。\n\n第七个镜头显示更多茂盛的树叶和树枝在阳光下。\n\n第八个镜头显示更多茂盛的树叶和树枝。\n\n\n",
|
||||
"narration": "接下来,我们的“挖土大师”又开始了他的森林探险。看这镜头,水滴在叶子上闪烁,仿佛在说:“快来,快来,这里有故事!”他一边挖洞,一边像个新手厨师试图切洋葱——每一下都小心翼翼,生怕自己不小心挖出个“历史遗址”。坐下休息的时候,脸上的表情就像发现新大陆一样!然后,他拿起斧头砍树枝,简直是现代版的“神雕侠侣”,只不过对象是树木。最后,那堆树枝架过泥泞的小水坑,仿佛在说:“我就是不怕湿脚的勇士!”这就是我们的建造之旅!",
|
||||
"OST": 2,
|
||||
"new_timestamp": "00:51-01:37"
|
||||
}
|
||||
],
|
||||
"video_clip_json_path": "E:\\projects\\NarratoAI\\resource\\scripts\\2024-1118-230421.json",
|
||||
"video_origin_path": "E:\\projects\\NarratoAI\\resource\\videos\\test.mp4",
|
||||
"video_aspect": "16:9",
|
||||
"video_language": "zh-CN",
|
||||
"voice_name": "zh-CN-YunjianNeural",
|
||||
"voice_volume": 1,
|
||||
"voice_rate": 1.2,
|
||||
"voice_pitch": 1,
|
||||
"bgm_name": "random",
|
||||
"bgm_type": "random",
|
||||
"bgm_file": "",
|
||||
"bgm_volume": 0.3,
|
||||
"subtitle_enabled": true,
|
||||
"subtitle_position": "bottom",
|
||||
"font_name": "STHeitiMedium.ttc",
|
||||
"text_fore_color": "#FFFFFF",
|
||||
"text_background_color": "transparent",
|
||||
"font_size": 75,
|
||||
"stroke_color": "#000000",
|
||||
"stroke_width": 1.5,
|
||||
"custom_position": 70,
|
||||
"n_threads": 8
|
||||
},
|
||||
"subclip_videos": {
|
||||
"00:10-01:01": "E:\\projects\\NarratoAI\\storage\\cache_videos/vid-00_10-01_01.mp4",
|
||||
"01:07-01:53": "E:\\projects\\NarratoAI\\storage\\cache_videos/vid-01_07-01_53.mp4"
|
||||
}
|
||||
}'
|
||||
|
||||
请求1,返回的参数是:
|
||||
{
|
||||
"task_id": "4e9b575f-68c0-4ae1-b218-db42b67993d0",
|
||||
"output_path": "E:\\projects\\NarratoAI\\resource\\videos\\2024-11-19.mp4",
|
||||
"resolution": "1080p",
|
||||
"format": "mp4",
|
||||
"filename": "2024-11-19.mp4"
|
||||
}
|
||||
output_path需要传递给请求2
|
||||
请求2,返回数据为:
|
||||
{
|
||||
"task_id": "04497017-953c-44b4-bf1d-9d8ed3ebbbce",
|
||||
"script": [
|
||||
{
|
||||
"timestamp": "00:10-01:01",
|
||||
"picture": "好的,以下是對影片畫面的客觀描述:\n\n影片顯示一名留著鬍鬚的男子在一處樹林茂密的斜坡上挖掘。\n\n畫面一:男子從後方出現,背著一個軍綠色的背包,背包裡似乎裝有工具。他穿著卡其色的長褲和深色的登山鞋。\n\n畫面二:特寫鏡頭顯示男子的背包,一個舊的鎬頭從包裡露出來,包裡還有其他工具,包括一個鏟子。\n\n畫面三:男子用鎬頭在斜坡上挖土,背包放在他旁邊。\n\n畫面四:特寫鏡頭顯示男子的登山鞋在泥土中。\n\n畫面五:男子坐在斜坡上,用手清理樹根和泥土。\n\n畫面六:地上有一些鬆動的泥土和落葉。\n\n畫面七:男子的背包近景鏡頭,他正在挖掘。\n\n畫面八:男子在斜坡上挖掘,揚起一陣塵土。\n\n畫面九:特寫鏡頭顯示男子用手清理泥土。\n\n畫面十:特寫鏡頭顯示挖出的泥土剖面,可以看到土壤的層次。",
|
||||
"narration": "上一个画面是我在绝美的自然中,准备开启我的“土豪”挖掘之旅。现在,你们看到这位留着胡子的“大哥”,他背着个军绿色的包,里面装的可不仅仅是工具,还有我对生活的无限热爱(以及一丝不安)。看!这把旧镐头就像我的前任——用起来费劲,但又舍不得扔掉。\n\n他在斜坡上挖土,泥土飞扬,仿佛在跟大地进行一场“泥巴大战”。每一铲下去,都能听到大地微微的呻吟:哎呀,我这颗小树根可比我当年的情感纠葛还难处理呢!别担心,这些泥土层次分明,简直可以开个“泥土博物馆”。所以,朋友们,跟着我一起享受这场泥泞中的乐趣吧!",
|
||||
"OST": 2,
|
||||
"new_timestamp": "00:00-00:51"
|
||||
},
|
||||
{
|
||||
"timestamp": "01:07-01:53",
|
||||
"picture": "好的,以下是對影片畫面內容的客觀描述:\n\n影片以一系列森林環境的鏡頭開始。第一個鏡頭展示了綠葉植物的特寫鏡頭,葉子上有一些水珠。接下來的鏡頭是一個男人在森林裡挖掘一個小坑,他跪在地上,用鏟子挖土。\n\n接下來的鏡頭是同一個男人坐在他挖的坑旁邊,望著前方。然後,鏡頭顯示該坑的廣角鏡頭,顯示其結構和大小。\n\n之後的鏡頭,同一個男人在樹林裡劈柴。鏡頭最後呈現出一潭渾濁的水,周圍環繞著樹枝。然後鏡頭又回到了森林裡生長茂盛的植物特寫鏡頭。",
|
||||
"narration": "好嘞,朋友们,我们已经在泥土博物馆里捣鼓了一阵子,现在是时候跟大自然亲密接触了!看看这片森林,绿叶上水珠闪闪发光,就像我曾经的爱情,虽然短暂,却美得让人心碎。\n\n现在,我在这里挖个小坑,感觉自己就像是一位新晋“挖土大王”,不过说实话,这手艺真不敢恭维,连铲子都快对我崩溃了。再说劈柴,这动作简直比我前任的情绪波动还要激烈!最后这一潭浑浊的水,别担心,它只是告诉我:生活就像这水,总有些杂质,但也别忘了,要勇敢面对哦!",
|
||||
"OST": 2,
|
||||
"new_timestamp": "00:51-01:37"
|
||||
}
|
||||
]
|
||||
}
|
||||
output_path和script参数需要传递给请求3
|
||||
请求3返回参数是
|
||||
{
|
||||
"task_id": "b6f5a98a-b2e0-4e3d-89c5-64fb90db2ec1",
|
||||
"subclip_videos": {
|
||||
"00:10-01:01": "E:\\projects\\NarratoAI\\storage\\cache_videos/vid-00_10-01_01.mp4",
|
||||
"01:07-01:53": "E:\\projects\\NarratoAI\\storage\\cache_videos/vid-01_07-01_53.mp4"
|
||||
}
|
||||
}
|
||||
subclip_videos和 output_path和script参数需要传递给请求4
|
||||
最后完成工作流
|
||||
|
||||
0代表只播放文案音频,禁用视频原声;1代表只播放视频原声,不需要播放文案音频和字幕;2代表即播放文案音频也要播放视频原声;
|
||||
@ -20,7 +20,7 @@ def render_audio_panel(tr):
|
||||
def render_tts_settings(tr):
|
||||
"""渲染TTS(文本转语音)设置"""
|
||||
# 获取支持的语音列表
|
||||
support_locales = ["zh-CN", "zh-HK", "zh-TW", "en-US"]
|
||||
support_locales = ["zh-CN"]
|
||||
voices = voice.get_all_azure_voices(filter_locals=support_locales)
|
||||
|
||||
# 创建友好的显示名称
|
||||
|
||||
@ -52,18 +52,34 @@ def render_language_settings(tr):
|
||||
|
||||
def render_proxy_settings(tr):
|
||||
"""渲染代理设置"""
|
||||
proxy_url_http = config.proxy.get("http", "") or os.getenv("VPN_PROXY_URL", "")
|
||||
proxy_url_https = config.proxy.get("https", "") or os.getenv("VPN_PROXY_URL", "")
|
||||
# 获取当前代理状态
|
||||
proxy_enabled = config.proxy.get("enabled", True)
|
||||
proxy_url_http = config.proxy.get("http")
|
||||
proxy_url_https = config.proxy.get("https")
|
||||
|
||||
HTTP_PROXY = st.text_input(tr("HTTP_PROXY"), value=proxy_url_http)
|
||||
HTTPS_PROXY = st.text_input(tr("HTTPs_PROXY"), value=proxy_url_https)
|
||||
# 添加代理开关
|
||||
proxy_enabled = st.checkbox(tr("Enable Proxy"), value=proxy_enabled)
|
||||
|
||||
# 保存代理开关状态
|
||||
config.proxy["enabled"] = proxy_enabled
|
||||
|
||||
if HTTP_PROXY:
|
||||
config.proxy["http"] = HTTP_PROXY
|
||||
os.environ["HTTP_PROXY"] = HTTP_PROXY
|
||||
if HTTPS_PROXY:
|
||||
config.proxy["https"] = HTTPS_PROXY
|
||||
os.environ["HTTPS_PROXY"] = HTTPS_PROXY
|
||||
# 只有在代理启用时才显示代理设置输入框
|
||||
if proxy_enabled:
|
||||
HTTP_PROXY = st.text_input(tr("HTTP_PROXY"), value=proxy_url_http)
|
||||
HTTPS_PROXY = st.text_input(tr("HTTPs_PROXY"), value=proxy_url_https)
|
||||
|
||||
if HTTP_PROXY:
|
||||
config.proxy["http"] = HTTP_PROXY
|
||||
os.environ["HTTP_PROXY"] = HTTP_PROXY
|
||||
if HTTPS_PROXY:
|
||||
config.proxy["https"] = HTTPS_PROXY
|
||||
os.environ["HTTPS_PROXY"] = HTTPS_PROXY
|
||||
else:
|
||||
# 当代理被禁用时,清除环境变量和配置
|
||||
os.environ.pop("HTTP_PROXY", None)
|
||||
os.environ.pop("HTTPS_PROXY", None)
|
||||
config.proxy["http"] = ""
|
||||
config.proxy["https"] = ""
|
||||
|
||||
|
||||
def test_vision_model_connection(api_key, base_url, model_name, provider, tr):
|
||||
@ -90,6 +106,28 @@ def test_vision_model_connection(api_key, base_url, model_name, provider, tr):
|
||||
except Exception as e:
|
||||
return False, f"{tr('gemini model is not available')}: {str(e)}"
|
||||
|
||||
elif provider.lower() == 'qwenvl':
|
||||
from openai import OpenAI
|
||||
try:
|
||||
client = OpenAI(
|
||||
api_key=api_key,
|
||||
base_url=base_url or "https://dashscope.aliyuncs.com/compatible-mode/v1"
|
||||
)
|
||||
|
||||
# 发送一个简单的测试请求
|
||||
response = client.chat.completions.create(
|
||||
model=model_name or "qwen-vl-max-latest",
|
||||
messages=[{"role": "user", "content": "直接回复我文本'当前网络可用'"}]
|
||||
)
|
||||
|
||||
if response and response.choices:
|
||||
return True, tr("QwenVL model is available")
|
||||
else:
|
||||
return False, tr("QwenVL model returned invalid response")
|
||||
|
||||
except Exception as e:
|
||||
return False, f"{tr('QwenVL model is not available')}: {str(e)}"
|
||||
|
||||
elif provider.lower() == 'narratoapi':
|
||||
import requests
|
||||
try:
|
||||
@ -116,7 +154,7 @@ def render_vision_llm_settings(tr):
|
||||
st.subheader(tr("Vision Model Settings"))
|
||||
|
||||
# 视频分析模型提供商选择
|
||||
vision_providers = ['Gemini', 'NarratoAPI(待发布)', 'QwenVL(待发布)']
|
||||
vision_providers = ['Gemini', 'QwenVL', 'NarratoAPI(待发布)']
|
||||
saved_vision_provider = config.app.get("vision_llm_provider", "Gemini").lower()
|
||||
saved_provider_index = 0
|
||||
|
||||
@ -142,18 +180,33 @@ def render_vision_llm_settings(tr):
|
||||
# 渲染视觉模型配置输入框
|
||||
st_vision_api_key = st.text_input(tr("Vision API Key"), value=vision_api_key, type="password")
|
||||
|
||||
# 当选择 Gemini 时禁用 base_url 输入
|
||||
if vision_provider.lower() == 'gemini':
|
||||
# 根据不同提供商设置默认值和帮助信息
|
||||
if vision_provider == 'gemini':
|
||||
st_vision_base_url = st.text_input(
|
||||
tr("Vision Base URL"),
|
||||
value=vision_base_url,
|
||||
disabled=True,
|
||||
help=tr("Gemini API does not require a base URL")
|
||||
)
|
||||
st_vision_model_name = st.text_input(
|
||||
tr("Vision Model Name"),
|
||||
value=vision_model_name or "gemini-1.5-flash",
|
||||
help=tr("Default: gemini-1.5-flash")
|
||||
)
|
||||
elif vision_provider == 'qwenvl':
|
||||
st_vision_base_url = st.text_input(
|
||||
tr("Vision Base URL"),
|
||||
value=vision_base_url or "https://dashscope.aliyuncs.com/compatible-mode/v1",
|
||||
help=tr("Default: https://dashscope.aliyuncs.com/compatible-mode/v1")
|
||||
)
|
||||
st_vision_model_name = st.text_input(
|
||||
tr("Vision Model Name"),
|
||||
value=vision_model_name or "qwen-vl-max-latest",
|
||||
help=tr("Default: qwen-vl-max-latest")
|
||||
)
|
||||
else:
|
||||
st_vision_base_url = st.text_input(tr("Vision Base URL"), value=vision_base_url)
|
||||
|
||||
st_vision_model_name = st.text_input(tr("Vision Model Name"), value=vision_model_name)
|
||||
st_vision_model_name = st.text_input(tr("Vision Model Name"), value=vision_model_name)
|
||||
|
||||
# 在配置输入框后添加测试按钮
|
||||
if st.button(tr("Test Connection"), key="test_vision_connection"):
|
||||
@ -174,7 +227,7 @@ def render_vision_llm_settings(tr):
|
||||
# 保存视觉模型配置
|
||||
if st_vision_api_key:
|
||||
config.app[f"vision_{vision_provider}_api_key"] = st_vision_api_key
|
||||
st.session_state[f"vision_{vision_provider}_api_key"] = st_vision_api_key # 用于script_settings.py
|
||||
st.session_state[f"vision_{vision_provider}_api_key"] = st_vision_api_key
|
||||
if st_vision_base_url:
|
||||
config.app[f"vision_{vision_provider}_base_url"] = st_vision_base_url
|
||||
st.session_state[f"vision_{vision_provider}_base_url"] = st_vision_base_url
|
||||
@ -182,81 +235,6 @@ def render_vision_llm_settings(tr):
|
||||
config.app[f"vision_{vision_provider}_model_name"] = st_vision_model_name
|
||||
st.session_state[f"vision_{vision_provider}_model_name"] = st_vision_model_name
|
||||
|
||||
# # NarratoAPI 特殊配置
|
||||
# if vision_provider == 'narratoapi':
|
||||
# st.subheader(tr("Narrato Additional Settings"))
|
||||
#
|
||||
# # Narrato API 基础配置
|
||||
# narrato_api_key = st.text_input(
|
||||
# tr("Narrato API Key"),
|
||||
# value=config.app.get("narrato_api_key", ""),
|
||||
# type="password",
|
||||
# help="用于访问 Narrato API 的密钥"
|
||||
# )
|
||||
# if narrato_api_key:
|
||||
# config.app["narrato_api_key"] = narrato_api_key
|
||||
# st.session_state['narrato_api_key'] = narrato_api_key
|
||||
#
|
||||
# narrato_api_url = st.text_input(
|
||||
# tr("Narrato API URL"),
|
||||
# value=config.app.get("narrato_api_url", "http://127.0.0.1:8000/api/v1/video/analyze")
|
||||
# )
|
||||
# if narrato_api_url:
|
||||
# config.app["narrato_api_url"] = narrato_api_url
|
||||
# st.session_state['narrato_api_url'] = narrato_api_url
|
||||
#
|
||||
# # 视频分析模型配置
|
||||
# st.markdown("##### " + tr("Vision Model Settings"))
|
||||
# narrato_vision_model = st.text_input(
|
||||
# tr("Vision Model Name"),
|
||||
# value=config.app.get("narrato_vision_model", "gemini-1.5-flash")
|
||||
# )
|
||||
# narrato_vision_key = st.text_input(
|
||||
# tr("Vision Model API Key"),
|
||||
# value=config.app.get("narrato_vision_key", ""),
|
||||
# type="password",
|
||||
# help="用于视频分析的模 API Key"
|
||||
# )
|
||||
#
|
||||
# if narrato_vision_model:
|
||||
# config.app["narrato_vision_model"] = narrato_vision_model
|
||||
# st.session_state['narrato_vision_model'] = narrato_vision_model
|
||||
# if narrato_vision_key:
|
||||
# config.app["narrato_vision_key"] = narrato_vision_key
|
||||
# st.session_state['narrato_vision_key'] = narrato_vision_key
|
||||
#
|
||||
# # 文案生成模型配置
|
||||
# st.markdown("##### " + tr("Text Generation Model Settings"))
|
||||
# narrato_llm_model = st.text_input(
|
||||
# tr("LLM Model Name"),
|
||||
# value=config.app.get("narrato_llm_model", "qwen-plus")
|
||||
# )
|
||||
# narrato_llm_key = st.text_input(
|
||||
# tr("LLM Model API Key"),
|
||||
# value=config.app.get("narrato_llm_key", ""),
|
||||
# type="password",
|
||||
# help="用于文案生成的模型 API Key"
|
||||
# )
|
||||
#
|
||||
# if narrato_llm_model:
|
||||
# config.app["narrato_llm_model"] = narrato_llm_model
|
||||
# st.session_state['narrato_llm_model'] = narrato_llm_model
|
||||
# if narrato_llm_key:
|
||||
# config.app["narrato_llm_key"] = narrato_llm_key
|
||||
# st.session_state['narrato_llm_key'] = narrato_llm_key
|
||||
#
|
||||
# # 批处理配置
|
||||
# narrato_batch_size = st.number_input(
|
||||
# tr("Batch Size"),
|
||||
# min_value=1,
|
||||
# max_value=50,
|
||||
# value=config.app.get("narrato_batch_size", 10),
|
||||
# help="每批处理的图片数量"
|
||||
# )
|
||||
# if narrato_batch_size:
|
||||
# config.app["narrato_batch_size"] = narrato_batch_size
|
||||
# st.session_state['narrato_batch_size'] = narrato_batch_size
|
||||
|
||||
|
||||
def test_text_model_connection(api_key, base_url, model_name, provider, tr):
|
||||
"""测试文本模型连接
|
||||
@ -328,6 +306,7 @@ def test_text_model_connection(api_key, base_url, model_name, provider, tr):
|
||||
except Exception as e:
|
||||
return False, f"{tr('Connection failed')}: {str(e)}"
|
||||
|
||||
|
||||
def render_text_llm_settings(tr):
|
||||
"""渲染文案生成模型设置"""
|
||||
st.subheader(tr("Text Generation Model Settings"))
|
||||
|
||||
303
webui/components/merge_settings.py
Normal file
303
webui/components/merge_settings.py
Normal file
@ -0,0 +1,303 @@
|
||||
import os
|
||||
import time
|
||||
import math
|
||||
import sys
|
||||
import tempfile
|
||||
import traceback
|
||||
import shutil
|
||||
|
||||
import streamlit as st
|
||||
from loguru import logger
|
||||
from typing import List, Dict, Tuple
|
||||
from dataclasses import dataclass
|
||||
from streamlit.runtime.uploaded_file_manager import UploadedFile
|
||||
|
||||
from webui.utils.merge_video import merge_videos_and_subtitles
|
||||
from app.utils.utils import video_dir, srt_dir
|
||||
from app.services.subtitle import extract_audio_and_create_subtitle
|
||||
|
||||
# 定义临时目录路径
|
||||
TEMP_MERGE_DIR = os.path.join("storage", "temp", "merge")
|
||||
|
||||
# 确保临时目录存在
|
||||
os.makedirs(TEMP_MERGE_DIR, exist_ok=True)
|
||||
|
||||
|
||||
@dataclass
|
||||
class VideoSubtitlePair:
|
||||
video_file: UploadedFile | None
|
||||
subtitle_file: str | None
|
||||
base_name: str
|
||||
order: int = 0
|
||||
|
||||
|
||||
def save_uploaded_file(uploaded_file: UploadedFile, target_dir: str) -> str:
|
||||
"""Save uploaded file to target directory and return the file path"""
|
||||
file_path = os.path.join(target_dir, uploaded_file.name)
|
||||
# 如果文件已存在,先删除它
|
||||
if os.path.exists(file_path):
|
||||
os.remove(file_path)
|
||||
with open(file_path, "wb") as f:
|
||||
f.write(uploaded_file.getvalue())
|
||||
return file_path
|
||||
|
||||
|
||||
def clean_temp_dir():
|
||||
"""清空临时目录"""
|
||||
if os.path.exists(TEMP_MERGE_DIR):
|
||||
for file in os.listdir(TEMP_MERGE_DIR):
|
||||
file_path = os.path.join(TEMP_MERGE_DIR, file)
|
||||
try:
|
||||
if os.path.isfile(file_path):
|
||||
os.unlink(file_path)
|
||||
except Exception as e:
|
||||
logger.error(f"清理临时文件失败: {str(e)}")
|
||||
|
||||
|
||||
def group_files(files: List[UploadedFile]) -> Dict[str, VideoSubtitlePair]:
|
||||
"""Group uploaded files by their base names"""
|
||||
pairs = {}
|
||||
order_counter = 0
|
||||
|
||||
# 首先处理所有视频文件
|
||||
for file in files:
|
||||
base_name = os.path.splitext(file.name)[0]
|
||||
ext = os.path.splitext(file.name)[1].lower()
|
||||
|
||||
if ext == ".mp4":
|
||||
if base_name not in pairs:
|
||||
pairs[base_name] = VideoSubtitlePair(None, None, base_name, order_counter)
|
||||
order_counter += 1
|
||||
pairs[base_name].video_file = file
|
||||
# 保存视频文件到临时目录
|
||||
video_path = save_uploaded_file(file, TEMP_MERGE_DIR)
|
||||
|
||||
# 然后处理所有字幕文件
|
||||
for file in files:
|
||||
base_name = os.path.splitext(file.name)[0]
|
||||
ext = os.path.splitext(file.name)[1].lower()
|
||||
|
||||
if ext == ".srt":
|
||||
# 即使没有对应视频也保存字幕文件
|
||||
subtitle_path = os.path.join(TEMP_MERGE_DIR, f"{base_name}.srt")
|
||||
save_uploaded_file(file, TEMP_MERGE_DIR)
|
||||
|
||||
if base_name in pairs: # 如果有对应的视频
|
||||
pairs[base_name].subtitle_file = subtitle_path
|
||||
|
||||
return pairs
|
||||
|
||||
|
||||
def render_merge_settings(tr):
|
||||
"""Render the merge settings section"""
|
||||
with st.expander(tr("Video Subtitle Merge"), expanded=False):
|
||||
# 上传文件区域
|
||||
uploaded_files = st.file_uploader(
|
||||
tr("Upload Video and Subtitle Files"),
|
||||
type=["mp4", "srt"],
|
||||
accept_multiple_files=True,
|
||||
key="merge_files"
|
||||
)
|
||||
|
||||
if uploaded_files:
|
||||
all_pairs = group_files(uploaded_files)
|
||||
|
||||
if all_pairs:
|
||||
st.write(tr("All Uploaded Files"))
|
||||
|
||||
# 初始化或更新session state中的排序信息
|
||||
if 'file_orders' not in st.session_state:
|
||||
st.session_state.file_orders = {
|
||||
name: pair.order for name, pair in all_pairs.items()
|
||||
}
|
||||
st.session_state.needs_reorder = False
|
||||
|
||||
# 确保所有新文件都有排序值
|
||||
for name, pair in all_pairs.items():
|
||||
if name not in st.session_state.file_orders:
|
||||
st.session_state.file_orders[name] = pair.order
|
||||
|
||||
# 移除不存在的文件的排序值
|
||||
st.session_state.file_orders = {
|
||||
k: v for k, v in st.session_state.file_orders.items()
|
||||
if k in all_pairs
|
||||
}
|
||||
|
||||
# 按照排序值对文件对进行排序
|
||||
sorted_pairs = sorted(
|
||||
all_pairs.items(),
|
||||
key=lambda x: st.session_state.file_orders[x[0]]
|
||||
)
|
||||
|
||||
# 计算需要多少行来显示所有视频(每行5个)
|
||||
num_pairs = len(sorted_pairs)
|
||||
num_rows = (num_pairs + 4) // 5 # 向上取整,每行5个
|
||||
|
||||
# 遍历每一行
|
||||
for row in range(num_rows):
|
||||
# 创建5列
|
||||
cols = st.columns(5)
|
||||
|
||||
# 在这一行中填充视频(最多5个)
|
||||
for col_idx in range(5):
|
||||
pair_idx = row * 5 + col_idx
|
||||
if pair_idx < num_pairs:
|
||||
base_name, pair = sorted_pairs[pair_idx]
|
||||
with cols[col_idx]:
|
||||
st.caption(base_name)
|
||||
|
||||
# 显示视频预览(如果存在)
|
||||
video_path = os.path.join(TEMP_MERGE_DIR, f"{base_name}.mp4")
|
||||
if os.path.exists(video_path):
|
||||
st.video(video_path)
|
||||
else:
|
||||
st.warning(tr("Missing Video"))
|
||||
|
||||
# 显示字幕预览(如果存在)
|
||||
subtitle_path = os.path.join(TEMP_MERGE_DIR, f"{base_name}.srt")
|
||||
if os.path.exists(subtitle_path):
|
||||
with open(subtitle_path, 'r', encoding='utf-8') as f:
|
||||
subtitle_content = f.read()
|
||||
st.markdown(tr("Subtitle Preview"))
|
||||
st.text_area(
|
||||
"Subtitle Content",
|
||||
value=subtitle_content,
|
||||
height=100, # 减高度以适应5列布局
|
||||
label_visibility="collapsed",
|
||||
key=f"subtitle_preview_{base_name}"
|
||||
)
|
||||
else:
|
||||
st.warning(tr("Missing Subtitle"))
|
||||
# 如果有视频但没有字幕,显示一键转录按钮
|
||||
if os.path.exists(video_path):
|
||||
if st.button(tr("One-Click Transcribe"), key=f"transcribe_{base_name}"):
|
||||
with st.spinner(tr("Transcribing...")):
|
||||
try:
|
||||
# 生成字幕文件
|
||||
result = extract_audio_and_create_subtitle(video_path, subtitle_path)
|
||||
if result:
|
||||
# 读取生成的字幕文件内容并显示预览
|
||||
with open(subtitle_path, 'r', encoding='utf-8') as f:
|
||||
subtitle_content = f.read()
|
||||
st.markdown(tr("Subtitle Preview"))
|
||||
st.text_area(
|
||||
"Subtitle Content",
|
||||
value=subtitle_content,
|
||||
height=150,
|
||||
label_visibility="collapsed",
|
||||
key=f"subtitle_preview_transcribed_{base_name}"
|
||||
)
|
||||
st.success(tr("Transcription Complete!"))
|
||||
# 更新pair的字幕文件路径
|
||||
pair.subtitle_file = subtitle_path
|
||||
else:
|
||||
st.error(tr("Transcription Failed. Please try again."))
|
||||
except Exception as e:
|
||||
error_message = str(e)
|
||||
logger.error(traceback.format_exc())
|
||||
if "rate limit exceeded" in error_message.lower():
|
||||
st.error(tr("API rate limit exceeded. Please wait about an hour and try again."))
|
||||
elif "resource_exhausted" in error_message.lower():
|
||||
st.error(tr("Resources exhausted. Please try again later."))
|
||||
else:
|
||||
st.error(f"{tr('Transcription Failed')}: {str(e)}")
|
||||
|
||||
# 排序输入框
|
||||
order = st.number_input(
|
||||
tr("Order"),
|
||||
min_value=0,
|
||||
value=st.session_state.file_orders[base_name],
|
||||
key=f"order_{base_name}",
|
||||
on_change=lambda: setattr(st.session_state, 'needs_reorder', True)
|
||||
)
|
||||
if order != st.session_state.file_orders[base_name]:
|
||||
st.session_state.file_orders[base_name] = order
|
||||
st.session_state.needs_reorder = True
|
||||
|
||||
# 如果需要重新排序,重新加载页面
|
||||
if st.session_state.needs_reorder:
|
||||
st.session_state.needs_reorder = False
|
||||
st.rerun()
|
||||
|
||||
# 找出有完整视频和字幕的文件对
|
||||
complete_pairs = {
|
||||
k: v for k, v in all_pairs.items()
|
||||
if os.path.exists(os.path.join(TEMP_MERGE_DIR, f"{k}.mp4")) and
|
||||
os.path.exists(os.path.join(TEMP_MERGE_DIR, f"{k}.srt"))
|
||||
}
|
||||
|
||||
# 合并按钮和结果显示
|
||||
cols = st.columns([1, 2, 1])
|
||||
with cols[0]:
|
||||
st.write(f"{tr('Mergeable Files')}: {len(complete_pairs)}")
|
||||
|
||||
merge_videos_result = None
|
||||
|
||||
with cols[1]:
|
||||
if st.button(tr("Merge All Files"), type="primary", use_container_width=True):
|
||||
try:
|
||||
# 获取排序后的完整文件对
|
||||
sorted_complete_pairs = sorted(
|
||||
[(k, v) for k, v in complete_pairs.items()],
|
||||
key=lambda x: st.session_state.file_orders[x[0]]
|
||||
)
|
||||
|
||||
video_paths = []
|
||||
subtitle_paths = []
|
||||
for base_name, _ in sorted_complete_pairs:
|
||||
video_paths.append(os.path.join(TEMP_MERGE_DIR, f"{base_name}.mp4"))
|
||||
subtitle_paths.append(os.path.join(TEMP_MERGE_DIR, f"{base_name}.srt"))
|
||||
|
||||
# 获取输出文件路径
|
||||
output_video = os.path.join(video_dir(), f"merged_video_{time.strftime('%M%S')}.mp4")
|
||||
output_subtitle = os.path.join(srt_dir(), f"merged_subtitle_{time.strftime('%M%S')}.srt")
|
||||
|
||||
with st.spinner(tr("Merging files...")):
|
||||
# 合并文件
|
||||
merge_videos_and_subtitles(
|
||||
video_paths,
|
||||
subtitle_paths,
|
||||
output_video,
|
||||
output_subtitle
|
||||
)
|
||||
|
||||
success = True
|
||||
error_msg = ""
|
||||
|
||||
# 检查输出文件是否成功生成
|
||||
if not os.path.exists(output_video):
|
||||
success = False
|
||||
error_msg += tr("Failed to generate merged video. ")
|
||||
if not os.path.exists(output_subtitle):
|
||||
success = False
|
||||
error_msg += tr("Failed to generate merged subtitle. ")
|
||||
|
||||
if success:
|
||||
# 显示成功消息
|
||||
st.success(tr("Merge completed!"))
|
||||
merge_videos_result = (output_video, output_subtitle)
|
||||
# 清理临时目录
|
||||
clean_temp_dir()
|
||||
else:
|
||||
st.error(error_msg)
|
||||
|
||||
except Exception as e:
|
||||
error_message = str(e)
|
||||
if "moviepy" in error_message.lower():
|
||||
st.error(tr("Error processing video files. Please check if the videos are valid MP4 files."))
|
||||
elif "pysrt" in error_message.lower():
|
||||
st.error(tr("Error processing subtitle files. Please check if the subtitles are valid SRT files."))
|
||||
else:
|
||||
st.error(f"{tr('Error during merge')}: {error_message}")
|
||||
|
||||
# 合并结果预览放在合并按钮下方
|
||||
if merge_videos_result:
|
||||
st.markdown(f"<h3 style='text-align: center'>{tr('Merge Result Preview')}</h3>", unsafe_allow_html=True)
|
||||
# 使用列布局使视频居中
|
||||
col1, col2, col3 = st.columns([1,2,1])
|
||||
with col2:
|
||||
st.video(merge_videos_result[0])
|
||||
st.code(f"{tr('Video Path')}: {merge_videos_result[0]}")
|
||||
st.code(f"{tr('Subtitle Path')}: {merge_videos_result[1]}")
|
||||
else:
|
||||
st.warning(tr("No Files Found"))
|
||||
@ -1,86 +1,15 @@
|
||||
import os
|
||||
import ssl
|
||||
import glob
|
||||
import json
|
||||
import time
|
||||
import asyncio
|
||||
import traceback
|
||||
from requests.adapters import HTTPAdapter
|
||||
from urllib3.util.retry import Retry
|
||||
import requests
|
||||
import streamlit as st
|
||||
from loguru import logger
|
||||
|
||||
from app.config import config
|
||||
from app.models.schema import VideoClipParams
|
||||
from app.utils.script_generator import ScriptProcessor
|
||||
from app.utils import utils, check_script, vision_analyzer, video_processor, video_processor_v2
|
||||
from webui.utils import file_utils
|
||||
from app.utils import utils, check_script
|
||||
from webui.tools.generate_script_docu import generate_script_docu
|
||||
|
||||
|
||||
def get_batch_timestamps(batch_files, prev_batch_files=None):
|
||||
"""
|
||||
获取一批文件的时间戳范围
|
||||
返回: (first_timestamp, last_timestamp, timestamp_range)
|
||||
|
||||
文件名格式: keyframe_001253_000050.jpg
|
||||
其中 000050 表示 00:00:50 (50秒)
|
||||
000101 表示 00:01:01 (1分1秒)
|
||||
|
||||
Args:
|
||||
batch_files: 当前批次的文件列表
|
||||
prev_batch_files: 上一个批次的文件列表,用于处理单张图片的情况
|
||||
"""
|
||||
if not batch_files:
|
||||
logger.warning("Empty batch files")
|
||||
return "00:00", "00:00", "00:00-00:00"
|
||||
|
||||
# 如果当前批次只有一张图片,且有上一个批次的文件,则使用上一批次的最后一张作为首帧
|
||||
if len(batch_files) == 1 and prev_batch_files and len(prev_batch_files) > 0:
|
||||
first_frame = os.path.basename(prev_batch_files[-1])
|
||||
last_frame = os.path.basename(batch_files[0])
|
||||
logger.debug(f"单张图片批次,使用上一批次最后一帧作为首帧: {first_frame}")
|
||||
else:
|
||||
# 提取首帧和尾帧的时间戳
|
||||
first_frame = os.path.basename(batch_files[0])
|
||||
last_frame = os.path.basename(batch_files[-1])
|
||||
|
||||
# 从文件名中提取时间信息
|
||||
first_time = first_frame.split('_')[2].replace('.jpg', '') # 000050
|
||||
last_time = last_frame.split('_')[2].replace('.jpg', '') # 000101
|
||||
|
||||
# 转换为分:秒格式
|
||||
def format_timestamp(time_str):
|
||||
# 时间格式为 MMSS,如 0050 表示 00:50, 0101 表示 01:01
|
||||
if len(time_str) < 4:
|
||||
logger.warning(f"Invalid timestamp format: {time_str}")
|
||||
return "00:00"
|
||||
|
||||
minutes = int(time_str[-4:-2]) # 取后4位的前2位作为分钟
|
||||
seconds = int(time_str[-2:]) # 取后2位作为秒数
|
||||
|
||||
# 处理进位
|
||||
if seconds >= 60:
|
||||
minutes += seconds // 60
|
||||
seconds = seconds % 60
|
||||
|
||||
return f"{minutes:02d}:{seconds:02d}"
|
||||
|
||||
first_timestamp = format_timestamp(first_time)
|
||||
last_timestamp = format_timestamp(last_time)
|
||||
timestamp_range = f"{first_timestamp}-{last_timestamp}"
|
||||
|
||||
logger.debug(f"解析时间戳: {first_frame} -> {first_timestamp}, {last_frame} -> {last_timestamp}")
|
||||
return first_timestamp, last_timestamp, timestamp_range
|
||||
|
||||
def get_batch_files(keyframe_files, result, batch_size=5):
|
||||
"""
|
||||
获取当前批次的图片文件
|
||||
"""
|
||||
batch_start = result['batch_index'] * batch_size
|
||||
batch_end = min(batch_start + batch_size, len(keyframe_files))
|
||||
return keyframe_files[batch_start:batch_end]
|
||||
|
||||
def render_script_panel(tr):
|
||||
"""渲染脚本配置面板"""
|
||||
with st.container(border=True):
|
||||
@ -102,7 +31,11 @@ def render_script_panel(tr):
|
||||
|
||||
def render_script_file(tr, params):
|
||||
"""渲染脚本文件选择"""
|
||||
script_list = [(tr("None"), ""), (tr("Auto Generate"), "auto")]
|
||||
script_list = [
|
||||
(tr("None"), ""),
|
||||
(tr("Auto Generate"), "auto"),
|
||||
(tr("Upload Script"), "upload_script") # 新增上传脚本选项
|
||||
]
|
||||
|
||||
# 获取已有脚本文件
|
||||
suffix = "*.json"
|
||||
@ -132,7 +65,7 @@ def render_script_file(tr, params):
|
||||
|
||||
selected_script_index = st.selectbox(
|
||||
tr("Script Files"),
|
||||
index=selected_index, # 使用找到的索引
|
||||
index=selected_index,
|
||||
options=range(len(script_list)),
|
||||
format_func=lambda x: script_list[x][0]
|
||||
)
|
||||
@ -141,10 +74,50 @@ def render_script_file(tr, params):
|
||||
st.session_state['video_clip_json_path'] = script_path
|
||||
params.video_clip_json_path = script_path
|
||||
|
||||
# 处理脚本上传
|
||||
if script_path == "upload_script":
|
||||
uploaded_file = st.file_uploader(
|
||||
tr("Upload Script File"),
|
||||
type=["json"],
|
||||
accept_multiple_files=False,
|
||||
)
|
||||
|
||||
if uploaded_file is not None:
|
||||
try:
|
||||
# 读取上传的JSON内容并验证格式
|
||||
script_content = uploaded_file.read().decode('utf-8')
|
||||
json_data = json.loads(script_content)
|
||||
|
||||
# 保存到脚本目录
|
||||
script_file_path = os.path.join(script_dir, uploaded_file.name)
|
||||
file_name, file_extension = os.path.splitext(uploaded_file.name)
|
||||
|
||||
# 如果文件已存在,添加时间戳
|
||||
if os.path.exists(script_file_path):
|
||||
timestamp = time.strftime("%Y%m%d%H%M%S")
|
||||
file_name_with_timestamp = f"{file_name}_{timestamp}"
|
||||
script_file_path = os.path.join(script_dir, file_name_with_timestamp + file_extension)
|
||||
|
||||
# 写入文件
|
||||
with open(script_file_path, "w", encoding='utf-8') as f:
|
||||
json.dump(json_data, f, ensure_ascii=False, indent=2)
|
||||
|
||||
# 更新状态
|
||||
st.success(tr("Script Uploaded Successfully"))
|
||||
st.session_state['video_clip_json_path'] = script_file_path
|
||||
params.video_clip_json_path = script_file_path
|
||||
time.sleep(1)
|
||||
st.rerun()
|
||||
|
||||
except json.JSONDecodeError:
|
||||
st.error(tr("Invalid JSON format"))
|
||||
except Exception as e:
|
||||
st.error(f"{tr('Upload failed')}: {str(e)}")
|
||||
|
||||
|
||||
def render_video_file(tr, params):
|
||||
"""渲染视频文件选择"""
|
||||
video_list = [(tr("None"), ""), (tr("Upload Local Files"), "local")]
|
||||
video_list = [(tr("None"), ""), (tr("Upload Local Files"), "upload_local")]
|
||||
|
||||
# 获取已有视频文件
|
||||
for suffix in ["*.mp4", "*.mov", "*.avi", "*.mkv"]:
|
||||
@ -164,7 +137,7 @@ def render_video_file(tr, params):
|
||||
st.session_state['video_origin_path'] = video_path
|
||||
params.video_origin_path = video_path
|
||||
|
||||
if video_path == "local":
|
||||
if video_path == "upload_local":
|
||||
uploaded_file = st.file_uploader(
|
||||
tr("Upload Local Files"),
|
||||
type=["mp4", "mov", "avi", "flv", "mkv"],
|
||||
@ -250,7 +223,7 @@ def render_script_buttons(tr, params):
|
||||
|
||||
if st.button(button_name, key="script_action", disabled=not script_path):
|
||||
if script_path == "auto":
|
||||
generate_script(tr, params)
|
||||
generate_script_docu(tr, params)
|
||||
else:
|
||||
load_script(tr, script_path)
|
||||
|
||||
@ -305,379 +278,6 @@ def load_script(tr, script_path):
|
||||
st.error(f"{tr('Failed to load script')}: {str(e)}")
|
||||
|
||||
|
||||
def generate_script(tr, params):
|
||||
"""生成视频脚本"""
|
||||
progress_bar = st.progress(0)
|
||||
status_text = st.empty()
|
||||
|
||||
def update_progress(progress: float, message: str = ""):
|
||||
progress_bar.progress(progress)
|
||||
if message:
|
||||
status_text.text(f"{progress}% - {message}")
|
||||
else:
|
||||
status_text.text(f"进度: {progress}%")
|
||||
|
||||
try:
|
||||
with st.spinner("正在生成脚本..."):
|
||||
if not params.video_origin_path:
|
||||
st.error("请先选择视频文件")
|
||||
return
|
||||
|
||||
# ===================提取键帧===================
|
||||
update_progress(10, "正在提取关键帧...")
|
||||
|
||||
# 创建临时目录用于存储关键帧
|
||||
keyframes_dir = os.path.join(utils.temp_dir(), "keyframes")
|
||||
video_hash = utils.md5(params.video_origin_path + str(os.path.getmtime(params.video_origin_path)))
|
||||
video_keyframes_dir = os.path.join(keyframes_dir, video_hash)
|
||||
|
||||
# 检查是否已经提取过关键帧
|
||||
keyframe_files = []
|
||||
if os.path.exists(video_keyframes_dir):
|
||||
# 取已有的关键帧文件
|
||||
for filename in sorted(os.listdir(video_keyframes_dir)):
|
||||
if filename.endswith('.jpg'):
|
||||
keyframe_files.append(os.path.join(video_keyframes_dir, filename))
|
||||
|
||||
if keyframe_files:
|
||||
logger.info(f"使用已缓存的关键帧: {video_keyframes_dir}")
|
||||
st.info(f"使用已缓存的关键帧,如需重新提取请删除目录: {video_keyframes_dir}")
|
||||
update_progress(20, f"使用已缓存关键帧,共 {len(keyframe_files)} 帧")
|
||||
|
||||
# 如果没有缓存的关键帧,则进行提取
|
||||
if not keyframe_files:
|
||||
try:
|
||||
# 确保目录存在
|
||||
os.makedirs(video_keyframes_dir, exist_ok=True)
|
||||
|
||||
# 初始化视频处理器
|
||||
if config.frames.get("version") == "v2":
|
||||
processor = video_processor_v2.VideoProcessor(params.video_origin_path)
|
||||
# 处理视频并提取关键帧
|
||||
processor.process_video_pipeline(
|
||||
output_dir=video_keyframes_dir,
|
||||
skip_seconds=st.session_state.get('skip_seconds'),
|
||||
threshold=st.session_state.get('threshold')
|
||||
)
|
||||
else:
|
||||
processor = video_processor.VideoProcessor(params.video_origin_path)
|
||||
# 处理视频并提取关键帧
|
||||
processor.process_video(
|
||||
output_dir=video_keyframes_dir,
|
||||
skip_seconds=0
|
||||
)
|
||||
|
||||
# 获取所有关键帧文件路径
|
||||
for filename in sorted(os.listdir(video_keyframes_dir)):
|
||||
if filename.endswith('.jpg'):
|
||||
keyframe_files.append(os.path.join(video_keyframes_dir, filename))
|
||||
|
||||
if not keyframe_files:
|
||||
raise Exception("未提取到任何关键帧")
|
||||
|
||||
update_progress(20, f"关键帧提取完成,共 {len(keyframe_files)} 帧")
|
||||
|
||||
except Exception as e:
|
||||
# 如果提取失败,清理创建的目录
|
||||
try:
|
||||
if os.path.exists(video_keyframes_dir):
|
||||
import shutil
|
||||
shutil.rmtree(video_keyframes_dir)
|
||||
except Exception as cleanup_err:
|
||||
logger.error(f"清理失败的关键帧目录时出错: {cleanup_err}")
|
||||
|
||||
raise Exception(f"关键帧提取失败: {str(e)}")
|
||||
|
||||
# 根据不同的 LLM 提供商处理
|
||||
vision_llm_provider = st.session_state.get('vision_llm_providers').lower()
|
||||
logger.debug(f"Vision LLM 提供商: {vision_llm_provider}")
|
||||
|
||||
if vision_llm_provider == 'gemini':
|
||||
try:
|
||||
# ===================初始化视觉分析器===================
|
||||
update_progress(30, "正在初始化视觉分析器...")
|
||||
|
||||
# 从配置中获取 Gemini 相关配置
|
||||
vision_api_key = st.session_state.get('vision_gemini_api_key')
|
||||
vision_model = st.session_state.get('vision_gemini_model_name')
|
||||
vision_base_url = st.session_state.get('vision_gemini_base_url')
|
||||
|
||||
if not vision_api_key or not vision_model:
|
||||
raise ValueError("未配置 Gemini API Key 或者 模型,请在基础设置中配置")
|
||||
|
||||
analyzer = vision_analyzer.VisionAnalyzer(
|
||||
model_name=vision_model,
|
||||
api_key=vision_api_key,
|
||||
)
|
||||
|
||||
update_progress(40, "正在分析关键帧...")
|
||||
|
||||
# ===================创建异步事件循环===================
|
||||
loop = asyncio.new_event_loop()
|
||||
asyncio.set_event_loop(loop)
|
||||
|
||||
# 执行异步分析
|
||||
vision_batch_size = st.session_state.get('vision_batch_size') or config.frames.get("vision_batch_size")
|
||||
results = loop.run_until_complete(
|
||||
analyzer.analyze_images(
|
||||
images=keyframe_files,
|
||||
prompt=config.app.get('vision_analysis_prompt'),
|
||||
batch_size=vision_batch_size
|
||||
)
|
||||
)
|
||||
loop.close()
|
||||
|
||||
# ===================处理分析结果===================
|
||||
update_progress(60, "正在整理分析结果...")
|
||||
|
||||
# 合并所有批次的析结果
|
||||
frame_analysis = ""
|
||||
prev_batch_files = None
|
||||
|
||||
for result in results:
|
||||
if 'error' in result:
|
||||
logger.warning(f"批次 {result['batch_index']} 处理出现警告: {result['error']}")
|
||||
continue
|
||||
# 获取当前批次的文件列表
|
||||
batch_files = get_batch_files(keyframe_files, result, vision_batch_size)
|
||||
logger.debug(f"批次 {result['batch_index']} 处理完成,共 {len(batch_files)} 张图片")
|
||||
logger.debug(batch_files)
|
||||
|
||||
first_timestamp, last_timestamp, _ = get_batch_timestamps(batch_files, prev_batch_files)
|
||||
logger.debug(f"处理时间戳: {first_timestamp}-{last_timestamp}")
|
||||
|
||||
# 添加带时间戳的分析结果
|
||||
frame_analysis += f"\n=== {first_timestamp}-{last_timestamp} ===\n"
|
||||
frame_analysis += result['response']
|
||||
frame_analysis += "\n"
|
||||
|
||||
# 更新上一个批次的文件
|
||||
prev_batch_files = batch_files
|
||||
|
||||
if not frame_analysis.strip():
|
||||
raise Exception("未能生成有效的帧分析结果")
|
||||
|
||||
# 保存分析结果
|
||||
analysis_path = os.path.join(utils.temp_dir(), "frame_analysis.txt")
|
||||
with open(analysis_path, 'w', encoding='utf-8') as f:
|
||||
f.write(frame_analysis)
|
||||
|
||||
update_progress(70, "正在生成脚本...")
|
||||
|
||||
# 从配置中获取文本生成相关配置
|
||||
text_provider = config.app.get('text_llm_provider', 'gemini').lower()
|
||||
text_api_key = config.app.get(f'text_{text_provider}_api_key')
|
||||
text_model = config.app.get(f'text_{text_provider}_model_name')
|
||||
text_base_url = config.app.get(f'text_{text_provider}_base_url')
|
||||
|
||||
# 构建帧内容列表
|
||||
frame_content_list = []
|
||||
prev_batch_files = None
|
||||
|
||||
for i, result in enumerate(results):
|
||||
if 'error' in result:
|
||||
continue
|
||||
|
||||
batch_files = get_batch_files(keyframe_files, result, vision_batch_size)
|
||||
_, _, timestamp_range = get_batch_timestamps(batch_files, prev_batch_files)
|
||||
|
||||
frame_content = {
|
||||
"timestamp": timestamp_range,
|
||||
"picture": result['response'],
|
||||
"narration": "",
|
||||
"OST": 2
|
||||
}
|
||||
frame_content_list.append(frame_content)
|
||||
|
||||
logger.debug(f"添加帧内容: 时间范围={timestamp_range}, 分析结果长度={len(result['response'])}")
|
||||
|
||||
# 更新上一个批次的文件
|
||||
prev_batch_files = batch_files
|
||||
|
||||
if not frame_content_list:
|
||||
raise Exception("没有有效的帧内容可以处理")
|
||||
|
||||
# ===================开始生成文案===================
|
||||
update_progress(80, "正在生成文案...")
|
||||
# 校验配置
|
||||
api_params = {
|
||||
"vision_api_key": vision_api_key,
|
||||
"vision_model_name": vision_model,
|
||||
"vision_base_url": vision_base_url or "",
|
||||
"text_api_key": text_api_key,
|
||||
"text_model_name": text_model,
|
||||
"text_base_url": text_base_url or ""
|
||||
}
|
||||
headers = {
|
||||
'accept': 'application/json',
|
||||
'Content-Type': 'application/json'
|
||||
}
|
||||
session = requests.Session()
|
||||
retry_strategy = Retry(
|
||||
total=3,
|
||||
backoff_factor=1,
|
||||
status_forcelist=[500, 502, 503, 504]
|
||||
)
|
||||
adapter = HTTPAdapter(max_retries=retry_strategy)
|
||||
session.mount("https://", adapter)
|
||||
try:
|
||||
response = session.post(
|
||||
f"{config.app.get('narrato_api_url')}/video/config",
|
||||
headers=headers,
|
||||
json=api_params,
|
||||
timeout=30,
|
||||
verify=True
|
||||
)
|
||||
except Exception as e:
|
||||
pass
|
||||
custom_prompt = st.session_state.get('custom_prompt', '')
|
||||
processor = ScriptProcessor(
|
||||
model_name=text_model,
|
||||
api_key=text_api_key,
|
||||
prompt=custom_prompt,
|
||||
base_url=text_base_url or "",
|
||||
video_theme=st.session_state.get('video_theme', '')
|
||||
)
|
||||
|
||||
# 处理帧内容生成脚本
|
||||
script_result = processor.process_frames(frame_content_list)
|
||||
|
||||
# <20><>结果转换为JSON字符串
|
||||
script = json.dumps(script_result, ensure_ascii=False, indent=2)
|
||||
|
||||
except Exception as e:
|
||||
logger.exception(f"大模型处理过程中发生错误\n{traceback.format_exc()}")
|
||||
raise Exception(f"分析失败: {str(e)}")
|
||||
|
||||
elif vision_llm_provider == 'narratoapi': # NarratoAPI
|
||||
try:
|
||||
# 创建临时目录
|
||||
temp_dir = utils.temp_dir("narrato")
|
||||
|
||||
# 打包关键帧
|
||||
update_progress(30, "正在打包关键帧...")
|
||||
zip_path = os.path.join(temp_dir, f"keyframes_{int(time.time())}.zip")
|
||||
if not file_utils.create_zip(keyframe_files, zip_path):
|
||||
raise Exception("打包关键帧失败")
|
||||
|
||||
# 获取API配置
|
||||
api_url = st.session_state.get('narrato_api_url')
|
||||
api_key = st.session_state.get('narrato_api_key')
|
||||
|
||||
if not api_key:
|
||||
raise ValueError("未配置 Narrato API Key,请在基础设置中配置")
|
||||
|
||||
# 准<><E58786><EFBFBD>API请求
|
||||
headers = {
|
||||
'X-API-Key': api_key,
|
||||
'accept': 'application/json'
|
||||
}
|
||||
|
||||
api_params = {
|
||||
'batch_size': st.session_state.get('narrato_batch_size', 10),
|
||||
'use_ai': False,
|
||||
'start_offset': 0,
|
||||
'vision_model': st.session_state.get('narrato_vision_model', 'gemini-1.5-flash'),
|
||||
'vision_api_key': st.session_state.get('narrato_vision_key'),
|
||||
'llm_model': st.session_state.get('narrato_llm_model', 'qwen-plus'),
|
||||
'llm_api_key': st.session_state.get('narrato_llm_key'),
|
||||
'custom_prompt': st.session_state.get('custom_prompt', '')
|
||||
}
|
||||
|
||||
# 发送API请求
|
||||
logger.info(f"请求NarratoAPI: {api_url}")
|
||||
update_progress(40, "正在上传文件...")
|
||||
with open(zip_path, 'rb') as f:
|
||||
files = {'file': (os.path.basename(zip_path), f, 'application/x-zip-compressed')}
|
||||
try:
|
||||
response = requests.post(
|
||||
f"{api_url}/video/analyze",
|
||||
headers=headers,
|
||||
params=api_params,
|
||||
files=files,
|
||||
timeout=30 # 设置超时时间
|
||||
)
|
||||
response.raise_for_status()
|
||||
except requests.RequestException as e:
|
||||
logger.error(f"Narrato API 请求失败:\n{traceback.format_exc()}")
|
||||
raise Exception(f"API请求失败: {str(e)}")
|
||||
|
||||
task_data = response.json()
|
||||
task_id = task_data["data"].get('task_id')
|
||||
if not task_id:
|
||||
raise Exception(f"无效的API响应: {response.text}")
|
||||
|
||||
# 轮询任务状态
|
||||
update_progress(50, "正在等待分析结果...")
|
||||
retry_count = 0
|
||||
max_retries = 60 # 最多等待2分钟
|
||||
|
||||
while retry_count < max_retries:
|
||||
try:
|
||||
status_response = requests.get(
|
||||
f"{api_url}/video/tasks/{task_id}",
|
||||
headers=headers,
|
||||
timeout=10
|
||||
)
|
||||
status_response.raise_for_status()
|
||||
task_status = status_response.json()['data']
|
||||
|
||||
if task_status['status'] == 'SUCCESS':
|
||||
script = task_status['result']['data']
|
||||
break
|
||||
elif task_status['status'] in ['FAILURE', 'RETRY']:
|
||||
raise Exception(f"任务失败: {task_status.get('error')}")
|
||||
|
||||
retry_count += 1
|
||||
time.sleep(2)
|
||||
|
||||
except requests.RequestException as e:
|
||||
logger.warning(f"获取任务状态失败,重试中: {str(e)}")
|
||||
retry_count += 1
|
||||
time.sleep(2)
|
||||
continue
|
||||
|
||||
if retry_count >= max_retries:
|
||||
raise Exception("任务执行超时")
|
||||
|
||||
except Exception as e:
|
||||
logger.exception(f"NarratoAPI 处理过程中发生错误\n{traceback.format_exc()}")
|
||||
raise Exception(f"NarratoAPI 处理失败: {str(e)}")
|
||||
finally:
|
||||
# 清理临时文件
|
||||
try:
|
||||
if os.path.exists(zip_path):
|
||||
os.remove(zip_path)
|
||||
except Exception as e:
|
||||
logger.warning(f"清理临时文件失败: {str(e)}")
|
||||
|
||||
else:
|
||||
logger.exception("Vision Model 未启用,请检查配置")
|
||||
|
||||
if script is None:
|
||||
st.error("生成脚本失败,请检查日志")
|
||||
st.stop()
|
||||
logger.info(f"脚本生成完成")
|
||||
if isinstance(script, list):
|
||||
st.session_state['video_clip_json'] = script
|
||||
elif isinstance(script, str):
|
||||
st.session_state['video_clip_json'] = json.loads(script)
|
||||
update_progress(80, "脚本生成完成")
|
||||
|
||||
time.sleep(0.1)
|
||||
progress_bar.progress(100)
|
||||
status_text.text("脚本生成完成!")
|
||||
st.success("视频脚本生成成功!")
|
||||
|
||||
except Exception as err:
|
||||
st.error(f"生成过程中发生错误: {str(err)}")
|
||||
logger.exception(f"生成脚本时发生错误\n{traceback.format_exc()}")
|
||||
finally:
|
||||
time.sleep(2)
|
||||
progress_bar.empty()
|
||||
status_text.empty()
|
||||
|
||||
|
||||
def save_script(tr, video_clip_json_details):
|
||||
"""保存视频脚本"""
|
||||
if not video_clip_json_details:
|
||||
@ -724,7 +324,7 @@ def crop_video(tr, params):
|
||||
utils.cut_video(params, update_progress)
|
||||
time.sleep(0.5)
|
||||
progress_bar.progress(100)
|
||||
status_text.text("剪辑完成!")
|
||||
status_text.text("剪完成!")
|
||||
st.success("视频剪辑成功完成!")
|
||||
except Exception as e:
|
||||
st.error(f"剪辑过程中发生错误: {str(e)}")
|
||||
@ -732,14 +332,3 @@ def crop_video(tr, params):
|
||||
time.sleep(2)
|
||||
progress_bar.empty()
|
||||
status_text.empty()
|
||||
|
||||
|
||||
def get_script_params():
|
||||
"""获取脚本参数"""
|
||||
return {
|
||||
'video_language': st.session_state.get('video_language', ''),
|
||||
'video_clip_json_path': st.session_state.get('video_clip_json_path', ''),
|
||||
'video_origin_path': st.session_state.get('video_origin_path', ''),
|
||||
'video_name': st.session_state.get('video_name', ''),
|
||||
'video_plot': st.session_state.get('video_plot', '')
|
||||
}
|
||||
|
||||
45
webui/components/system_settings.py
Normal file
45
webui/components/system_settings.py
Normal file
@ -0,0 +1,45 @@
|
||||
import streamlit as st
|
||||
import os
|
||||
import shutil
|
||||
from loguru import logger
|
||||
|
||||
from app.utils.utils import storage_dir
|
||||
|
||||
|
||||
def clear_directory(dir_path, tr):
|
||||
"""清理指定目录"""
|
||||
if os.path.exists(dir_path):
|
||||
try:
|
||||
for item in os.listdir(dir_path):
|
||||
item_path = os.path.join(dir_path, item)
|
||||
try:
|
||||
if os.path.isfile(item_path):
|
||||
os.unlink(item_path)
|
||||
elif os.path.isdir(item_path):
|
||||
shutil.rmtree(item_path)
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to delete {item_path}: {e}")
|
||||
st.success(tr("Directory cleared"))
|
||||
logger.info(f"Cleared directory: {dir_path}")
|
||||
except Exception as e:
|
||||
st.error(f"{tr('Failed to clear directory')}: {str(e)}")
|
||||
logger.error(f"Failed to clear directory {dir_path}: {e}")
|
||||
else:
|
||||
st.warning(tr("Directory does not exist"))
|
||||
|
||||
def render_system_panel(tr):
|
||||
"""渲染系统设置面板"""
|
||||
with st.expander(tr("System settings"), expanded=False):
|
||||
col1, col2, col3 = st.columns(3)
|
||||
|
||||
with col1:
|
||||
if st.button(tr("Clear frames"), use_container_width=True):
|
||||
clear_directory(os.path.join(storage_dir(), "temp/keyframes"), tr)
|
||||
|
||||
with col2:
|
||||
if st.button(tr("Clear clip videos"), use_container_width=True):
|
||||
clear_directory(os.path.join(storage_dir(), "temp/clip_video"), tr)
|
||||
|
||||
with col3:
|
||||
if st.button(tr("Clear tasks"), use_container_width=True):
|
||||
clear_directory(os.path.join(storage_dir(), "tasks"), tr)
|
||||
@ -15,7 +15,7 @@
|
||||
"Crop Video": "裁剪视频",
|
||||
"Video File": "视频文件(:blue[1️⃣支持上传视频文件(限制2G) 2️⃣大文件建议直接导入 ./resource/videos 目录])",
|
||||
"Plot Description": "剧情描述 (:blue[可从 https://www.tvmao.com/ 获取])",
|
||||
"Generate Video Keywords": "点击使用AI根据**文案**生成【视频关键<EFBFBD><EFBFBD>】",
|
||||
"Generate Video Keywords": "点击使用AI根据**文案**生成【视频关键】",
|
||||
"Please Enter the Video Subject": "请先填写视频文案",
|
||||
"Generating Video Script and Keywords": "AI正在生成视频文案和关键词...",
|
||||
"Generating Video Keywords": "AI正在生成视频关键词...",
|
||||
@ -95,7 +95,7 @@
|
||||
"Check Format": "脚本格式检查",
|
||||
"Script Loaded Successfully": "脚本加载成功",
|
||||
"Script format check passed": "脚本格式检查通过",
|
||||
"Script format check failed": "脚本格式检查失<EFBFBD><EFBFBD>",
|
||||
"Script format check failed": "脚本格式检查失败",
|
||||
"Failed to Load Script": "加载脚本失败",
|
||||
"Failed to Save Script": "保存脚本失败",
|
||||
"Script saved successfully": "脚本保存成功",
|
||||
@ -103,7 +103,6 @@
|
||||
"Video Quality": "视频质量",
|
||||
"Custom prompt for LLM, leave empty to use default prompt": "自定义提示词,留空则使用默认提示词",
|
||||
"Proxy Settings": "代理设置",
|
||||
"Language": "界面语言",
|
||||
"HTTP_PROXY": "HTTP 代理",
|
||||
"HTTPs_PROXY": "HTTPS 代理",
|
||||
"Vision Model Settings": "视频分析模型设置",
|
||||
@ -134,6 +133,61 @@
|
||||
"Unsupported provider": "不支持的提供商",
|
||||
"0: Keep the audio only, 1: Keep the original sound only, 2: Keep the original sound and audio": "0: 仅保留音频,1: 仅保留原声,2: 保留原声和音频",
|
||||
"Text model is not available": "文案生成模型不可用",
|
||||
"Text model is available": "文案生成模型可用"
|
||||
"Text model is available": "文案生成模型可用",
|
||||
"Upload Script": "上传脚本",
|
||||
"Upload Script File": "上传脚本文件",
|
||||
"Script Uploaded Successfully": "脚本上传成功",
|
||||
"Invalid JSON format": "无效的JSON格式",
|
||||
"Upload failed": "上传失败",
|
||||
"Video Subtitle Merge": "**合并视频与字幕**",
|
||||
"Upload Video and Subtitle Files": "上传视频和字幕文件",
|
||||
"Matched File Pairs": "已匹配的文件对",
|
||||
"Merge All Files": "合并所有文件",
|
||||
"Merge Function Not Implemented": "合并功能待实现",
|
||||
"No Matched Pairs Found": "未找到匹配的文件对",
|
||||
"Missing Subtitle": "缺少对应的字幕文件",
|
||||
"Missing Video": "缺少对应的视频文件",
|
||||
"All Uploaded Files": "所有上传的文件",
|
||||
"Order": "排序序号",
|
||||
"Reorder": "重新排序",
|
||||
"Merging files...": "正在合并文件...",
|
||||
"Merge completed!": "合并完成!",
|
||||
"Download Merged Video": "下载合并后的视频",
|
||||
"Download Merged Subtitle": "下载合并后的字幕",
|
||||
"Error during merge": "合并过程中出错",
|
||||
"Failed to generate merged video.": "生成合并视频失败。",
|
||||
"Failed to generate merged subtitle.": "生成合并字幕失败。",
|
||||
"Error reading merged video file": "读取合并后的视频文件时出错",
|
||||
"Error reading merged subtitle file": "读取合并后的字幕文件时出错",
|
||||
"Error processing video files. Please check if the videos are valid MP4 files.": "处理视频文件时出错。请检查视频是否为有效的MP4文件。",
|
||||
"Error processing subtitle files. Please check if the subtitles are valid SRT files.": "处理字幕文件时出错。请检查字幕是否为有效的SRT文件。",
|
||||
"Preview Merged Video": "预览合并后的视频",
|
||||
"Video Path": "视频路径",
|
||||
"Subtitle Path": "字幕路径",
|
||||
"Enable Proxy": "启用代理",
|
||||
"QwenVL model is available": "QwenVL 模型可用",
|
||||
"QwenVL model is not available": "QwenVL 模型不可用",
|
||||
"System settings": "系统设置",
|
||||
"Clear Cache": "清理缓存",
|
||||
"Cache cleared": "缓存清理完成",
|
||||
"storage directory does not exist": "storage目录不存在",
|
||||
"Failed to clear cache": "清理缓存失败",
|
||||
"Clear frames": "清理关键帧",
|
||||
"Clear clip videos": "清理裁剪视频",
|
||||
"Clear tasks": "清理任务",
|
||||
"Directory cleared": "目录清理完成",
|
||||
"Directory does not exist": "目录不存在",
|
||||
"Failed to clear directory": "清理目录失败",
|
||||
"Subtitle Preview": "字幕预览",
|
||||
"One-Click Transcribe": "一键转录",
|
||||
"Transcribing...": "正在转录中...",
|
||||
"Transcription Complete!": "转录完成!",
|
||||
"Transcription Failed. Please try again.": "转录失败,请重试。",
|
||||
"API rate limit exceeded. Please wait about an hour and try again.": "API 调用次数已达到限制,请等待约一小时后再试。",
|
||||
"Resources exhausted. Please try again later.": "资源已耗尽,请稍后再试。",
|
||||
"Transcription Failed": "转录失败",
|
||||
"Mergeable Files": "可合并文件数",
|
||||
"Subtitle Content": "字幕内容",
|
||||
"Merge Result Preview": "合并结果预览"
|
||||
}
|
||||
}
|
||||
|
||||
141
webui/tools/base.py
Normal file
141
webui/tools/base.py
Normal file
@ -0,0 +1,141 @@
|
||||
import os
|
||||
import streamlit as st
|
||||
from loguru import logger
|
||||
|
||||
from app.utils import gemini_analyzer, qwenvl_analyzer
|
||||
|
||||
|
||||
def create_vision_analyzer(provider, api_key, model, base_url):
|
||||
"""
|
||||
创建视觉分析器实例
|
||||
|
||||
Args:
|
||||
provider: 提供商名称 ('gemini' 或 'qwenvl')
|
||||
api_key: API密钥
|
||||
model: 模型名称
|
||||
base_url: API基础URL
|
||||
|
||||
Returns:
|
||||
VisionAnalyzer 或 QwenAnalyzer 实例
|
||||
"""
|
||||
if provider == 'gemini':
|
||||
return gemini_analyzer.VisionAnalyzer(model_name=model, api_key=api_key)
|
||||
elif provider == 'qwenvl':
|
||||
# 只传入必要的参数
|
||||
return qwenvl_analyzer.QwenAnalyzer(
|
||||
model_name=model,
|
||||
api_key=api_key,
|
||||
base_url=base_url
|
||||
)
|
||||
else:
|
||||
raise ValueError(f"不支持的视觉分析提供商: {provider}")
|
||||
|
||||
|
||||
def get_script_params():
|
||||
"""获取脚本参数"""
|
||||
return {
|
||||
'video_language': st.session_state.get('video_language', ''),
|
||||
'video_clip_json_path': st.session_state.get('video_clip_json_path', ''),
|
||||
'video_origin_path': st.session_state.get('video_origin_path', ''),
|
||||
'video_name': st.session_state.get('video_name', ''),
|
||||
'video_plot': st.session_state.get('video_plot', '')
|
||||
}
|
||||
|
||||
|
||||
def get_batch_timestamps(batch_files, prev_batch_files=None):
|
||||
"""
|
||||
解析一批文件的时间戳范围,支持毫秒级精度
|
||||
|
||||
Args:
|
||||
batch_files: 当前批次的文件列表
|
||||
prev_batch_files: 上一个批次的文件列表,用于处理单张图片的情况
|
||||
|
||||
Returns:
|
||||
tuple: (first_timestamp, last_timestamp, timestamp_range)
|
||||
时间戳格式: HH:MM:SS,mmm (时:分:秒,毫秒)
|
||||
例如: 00:00:50,100 表示50秒100毫秒
|
||||
|
||||
示例文件名格式:
|
||||
keyframe_001253_000050100.jpg
|
||||
其中 000050100 表示 00:00:50,100 (50秒100毫秒)
|
||||
"""
|
||||
if not batch_files:
|
||||
logger.warning("Empty batch files")
|
||||
return "00:00:00,000", "00:00:00,000", "00:00:00,000-00:00:00,000"
|
||||
|
||||
def get_frame_files():
|
||||
"""获取首帧和尾帧文件名"""
|
||||
if len(batch_files) == 1 and prev_batch_files and prev_batch_files:
|
||||
# 单张图片情况:使用上一批次最后一帧作为首帧
|
||||
first = os.path.basename(prev_batch_files[-1])
|
||||
last = os.path.basename(batch_files[0])
|
||||
logger.debug(f"单张图片批次,使用上一批次最后一帧作为首帧: {first}")
|
||||
else:
|
||||
first = os.path.basename(batch_files[0])
|
||||
last = os.path.basename(batch_files[-1])
|
||||
return first, last
|
||||
|
||||
def extract_time(filename):
|
||||
"""从文件名提取时间信息"""
|
||||
try:
|
||||
# 提取类似 000050100 的时间戳部分
|
||||
time_str = filename.split('_')[2].replace('.jpg', '')
|
||||
if len(time_str) < 9: # 处理旧格式
|
||||
time_str = time_str.ljust(9, '0')
|
||||
return time_str
|
||||
except (IndexError, AttributeError) as e:
|
||||
logger.warning(f"Invalid filename format: {filename}, error: {e}")
|
||||
return "000000000"
|
||||
|
||||
def format_timestamp(time_str):
|
||||
"""
|
||||
将时间字符串转换为 HH:MM:SS,mmm 格式
|
||||
|
||||
Args:
|
||||
time_str: 9位数字字符串,格式为 HHMMSSMMM
|
||||
例如: 000010000 表示 00时00分10秒000毫秒
|
||||
000043039 表示 00时00分43秒039毫秒
|
||||
|
||||
Returns:
|
||||
str: HH:MM:SS,mmm 格式的时间戳
|
||||
"""
|
||||
try:
|
||||
if len(time_str) < 9:
|
||||
logger.warning(f"Invalid timestamp format: {time_str}")
|
||||
return "00:00:00,000"
|
||||
|
||||
# 从时间戳中提取时、分、秒和毫秒
|
||||
hours = int(time_str[0:2]) # 前2位作为小时
|
||||
minutes = int(time_str[2:4]) # 第3-4位作为分钟
|
||||
seconds = int(time_str[4:6]) # 第5-6位作为秒数
|
||||
milliseconds = int(time_str[6:]) # 最后3位作为毫秒
|
||||
|
||||
return f"{hours:02d}:{minutes:02d}:{seconds:02d},{milliseconds:03d}"
|
||||
|
||||
except ValueError as e:
|
||||
logger.warning(f"时间戳格式转换失败: {time_str}, error: {e}")
|
||||
return "00:00:00,000"
|
||||
|
||||
# 获取首帧和尾帧文件名
|
||||
first_frame, last_frame = get_frame_files()
|
||||
|
||||
# 从文件名中提取时间信息
|
||||
first_time = extract_time(first_frame)
|
||||
last_time = extract_time(last_frame)
|
||||
|
||||
# 转换为标准时间戳格式
|
||||
first_timestamp = format_timestamp(first_time)
|
||||
last_timestamp = format_timestamp(last_time)
|
||||
timestamp_range = f"{first_timestamp}-{last_timestamp}"
|
||||
|
||||
# logger.debug(f"解析时间戳: {first_frame} -> {first_timestamp}, {last_frame} -> {last_timestamp}")
|
||||
return first_timestamp, last_timestamp, timestamp_range
|
||||
|
||||
|
||||
def get_batch_files(keyframe_files, result, batch_size=5):
|
||||
"""
|
||||
获取当前批次的图片文件
|
||||
"""
|
||||
batch_start = result['batch_index'] * batch_size
|
||||
batch_end = min(batch_start + batch_size, len(keyframe_files))
|
||||
return keyframe_files[batch_start:batch_end]
|
||||
293
webui/tools/generate_script_docu.py
Normal file
293
webui/tools/generate_script_docu.py
Normal file
@ -0,0 +1,293 @@
|
||||
# 纪录片脚本生成
|
||||
import os
|
||||
import json
|
||||
import time
|
||||
import asyncio
|
||||
import traceback
|
||||
import requests
|
||||
import streamlit as st
|
||||
from loguru import logger
|
||||
from requests.adapters import HTTPAdapter
|
||||
from urllib3.util.retry import Retry
|
||||
|
||||
from app.config import config
|
||||
from app.utils.script_generator import ScriptProcessor
|
||||
from app.utils import utils, video_processor, video_processor_v2, qwenvl_analyzer
|
||||
from webui.tools.base import create_vision_analyzer, get_batch_files, get_batch_timestamps
|
||||
|
||||
|
||||
def generate_script_docu(tr, params):
|
||||
"""
|
||||
生成 纪录片 视频脚本
|
||||
"""
|
||||
progress_bar = st.progress(0)
|
||||
status_text = st.empty()
|
||||
|
||||
def update_progress(progress: float, message: str = ""):
|
||||
progress_bar.progress(progress)
|
||||
if message:
|
||||
status_text.text(f"{progress}% - {message}")
|
||||
else:
|
||||
status_text.text(f"进度: {progress}%")
|
||||
|
||||
try:
|
||||
with st.spinner("正在生成脚本..."):
|
||||
if not params.video_origin_path:
|
||||
st.error("请先选择视频文件")
|
||||
return
|
||||
|
||||
# ===================提取键帧===================
|
||||
update_progress(10, "正在提取关键帧...")
|
||||
|
||||
# 创建临时目录用于存储关键帧
|
||||
keyframes_dir = os.path.join(utils.temp_dir(), "keyframes")
|
||||
video_hash = utils.md5(params.video_origin_path + str(os.path.getmtime(params.video_origin_path)))
|
||||
video_keyframes_dir = os.path.join(keyframes_dir, video_hash)
|
||||
|
||||
# 检查是否已经提取过关键帧
|
||||
keyframe_files = []
|
||||
if os.path.exists(video_keyframes_dir):
|
||||
# 取已有的关键帧文件
|
||||
for filename in sorted(os.listdir(video_keyframes_dir)):
|
||||
if filename.endswith('.jpg'):
|
||||
keyframe_files.append(os.path.join(video_keyframes_dir, filename))
|
||||
|
||||
if keyframe_files:
|
||||
logger.info(f"使用已缓存的关键帧: {video_keyframes_dir}")
|
||||
st.info(f"使用已缓存的关键帧,如需重新提取请删除目录: {video_keyframes_dir}")
|
||||
update_progress(20, f"使用已缓存关键帧,共 {len(keyframe_files)} 帧")
|
||||
|
||||
# 如果没有缓存的关键帧,则进行提取
|
||||
if not keyframe_files:
|
||||
try:
|
||||
# 确保目录存在
|
||||
os.makedirs(video_keyframes_dir, exist_ok=True)
|
||||
|
||||
# 初始化视频处理器
|
||||
if config.frames.get("version") == "v2":
|
||||
processor = video_processor_v2.VideoProcessor(params.video_origin_path)
|
||||
# 处理视频并提取关键帧
|
||||
processor.process_video_pipeline(
|
||||
output_dir=video_keyframes_dir,
|
||||
skip_seconds=st.session_state.get('skip_seconds'),
|
||||
threshold=st.session_state.get('threshold')
|
||||
)
|
||||
else:
|
||||
processor = video_processor.VideoProcessor(params.video_origin_path)
|
||||
# 处理视频并提取关键帧
|
||||
processor.process_video(
|
||||
output_dir=video_keyframes_dir,
|
||||
skip_seconds=0
|
||||
)
|
||||
|
||||
# 获取所有关键文件路径
|
||||
for filename in sorted(os.listdir(video_keyframes_dir)):
|
||||
if filename.endswith('.jpg'):
|
||||
keyframe_files.append(os.path.join(video_keyframes_dir, filename))
|
||||
|
||||
if not keyframe_files:
|
||||
raise Exception("未提取到任何关键帧")
|
||||
|
||||
update_progress(20, f"关键帧提取完成,共 {len(keyframe_files)} 帧")
|
||||
|
||||
except Exception as e:
|
||||
# 如果提取失败,清理创建的目录
|
||||
try:
|
||||
if os.path.exists(video_keyframes_dir):
|
||||
import shutil
|
||||
shutil.rmtree(video_keyframes_dir)
|
||||
except Exception as cleanup_err:
|
||||
logger.error(f"清理失败的关键帧目录时出错: {cleanup_err}")
|
||||
|
||||
raise Exception(f"关键帧提取失败: {str(e)}")
|
||||
|
||||
# 根据不同的 LLM 提供商处理
|
||||
vision_llm_provider = st.session_state.get('vision_llm_providers').lower()
|
||||
logger.debug(f"Vision LLM 提供商: {vision_llm_provider}")
|
||||
|
||||
try:
|
||||
# ===================初始化视觉分析器===================
|
||||
update_progress(30, "正在初始化视觉分析器...")
|
||||
|
||||
# 从配置中获取相关配置
|
||||
if vision_llm_provider == 'gemini':
|
||||
vision_api_key = st.session_state.get('vision_gemini_api_key')
|
||||
vision_model = st.session_state.get('vision_gemini_model_name')
|
||||
vision_base_url = st.session_state.get('vision_gemini_base_url')
|
||||
elif vision_llm_provider == 'qwenvl':
|
||||
vision_api_key = st.session_state.get('vision_qwenvl_api_key')
|
||||
vision_model = st.session_state.get('vision_qwenvl_model_name', 'qwen-vl-max-latest')
|
||||
vision_base_url = st.session_state.get('vision_qwenvl_base_url',
|
||||
'https://dashscope.aliyuncs.com/compatible-mode/v1')
|
||||
else:
|
||||
raise ValueError(f"不支持的视觉分析提供商: {vision_llm_provider}")
|
||||
|
||||
# 创建视觉分析器实例
|
||||
analyzer = create_vision_analyzer(
|
||||
provider=vision_llm_provider,
|
||||
api_key=vision_api_key,
|
||||
model=vision_model,
|
||||
base_url=vision_base_url
|
||||
)
|
||||
|
||||
update_progress(40, "正在分析关键帧...")
|
||||
|
||||
# ===================创建异步事件循环===================
|
||||
loop = asyncio.new_event_loop()
|
||||
asyncio.set_event_loop(loop)
|
||||
|
||||
# 执行异步分析
|
||||
vision_batch_size = st.session_state.get('vision_batch_size') or config.frames.get("vision_batch_size")
|
||||
results = loop.run_until_complete(
|
||||
analyzer.analyze_images(
|
||||
images=keyframe_files,
|
||||
prompt=config.app.get('vision_analysis_prompt'),
|
||||
batch_size=vision_batch_size
|
||||
)
|
||||
)
|
||||
loop.close()
|
||||
|
||||
# ===================处理分析结果===================
|
||||
update_progress(60, "正在整理分析结果...")
|
||||
|
||||
# 合并所有批次的析结果
|
||||
frame_analysis = ""
|
||||
prev_batch_files = None
|
||||
|
||||
for result in results:
|
||||
if 'error' in result:
|
||||
logger.warning(f"批次 {result['batch_index']} 处理出现警告: {result['error']}")
|
||||
|
||||
# 获取当前批次的文件列表 keyframe_001136_000045.jpg 将 000045 精度提升到 毫秒
|
||||
batch_files = get_batch_files(keyframe_files, result, vision_batch_size)
|
||||
logger.debug(f"批次 {result['batch_index']} 处理完成,共 {len(batch_files)} 张图片")
|
||||
# logger.debug(batch_files)
|
||||
|
||||
first_timestamp, last_timestamp, _ = get_batch_timestamps(batch_files, prev_batch_files)
|
||||
logger.debug(f"处理时间戳: {first_timestamp}-{last_timestamp}")
|
||||
|
||||
# 添加带时间戳的分析结果
|
||||
frame_analysis += f"\n=== {first_timestamp}-{last_timestamp} ===\n"
|
||||
frame_analysis += result['response']
|
||||
frame_analysis += "\n"
|
||||
|
||||
# 更新上一个批次的文件
|
||||
prev_batch_files = batch_files
|
||||
|
||||
if not frame_analysis.strip():
|
||||
raise Exception("未能生成有效的帧分析结果")
|
||||
|
||||
# 保存分析结果
|
||||
analysis_path = os.path.join(utils.temp_dir(), "frame_analysis.txt")
|
||||
with open(analysis_path, 'w', encoding='utf-8') as f:
|
||||
f.write(frame_analysis)
|
||||
|
||||
update_progress(70, "正在生成脚本...")
|
||||
|
||||
# 从配置中获取文本生成相关配置
|
||||
text_provider = config.app.get('text_llm_provider', 'gemini').lower()
|
||||
text_api_key = config.app.get(f'text_{text_provider}_api_key')
|
||||
text_model = config.app.get(f'text_{text_provider}_model_name')
|
||||
text_base_url = config.app.get(f'text_{text_provider}_base_url')
|
||||
|
||||
# 构建帧内容列表
|
||||
frame_content_list = []
|
||||
prev_batch_files = None
|
||||
|
||||
for i, result in enumerate(results):
|
||||
if 'error' in result:
|
||||
continue
|
||||
|
||||
batch_files = get_batch_files(keyframe_files, result, vision_batch_size)
|
||||
_, _, timestamp_range = get_batch_timestamps(batch_files, prev_batch_files)
|
||||
|
||||
frame_content = {
|
||||
"timestamp": timestamp_range,
|
||||
"picture": result['response'],
|
||||
"narration": "",
|
||||
"OST": 2
|
||||
}
|
||||
frame_content_list.append(frame_content)
|
||||
|
||||
logger.debug(f"添加帧内容: 时间范围={timestamp_range}, 分析结果长度={len(result['response'])}")
|
||||
|
||||
# 更新上一个批次的文件
|
||||
prev_batch_files = batch_files
|
||||
|
||||
if not frame_content_list:
|
||||
raise Exception("没有有效的帧内容可以处理")
|
||||
|
||||
# ===================开始生成文案===================
|
||||
update_progress(80, "正在生成文案...")
|
||||
# 校验配置
|
||||
api_params = {
|
||||
"vision_api_key": vision_api_key,
|
||||
"vision_model_name": vision_model,
|
||||
"vision_base_url": vision_base_url or "",
|
||||
"text_api_key": text_api_key,
|
||||
"text_model_name": text_model,
|
||||
"text_base_url": text_base_url or ""
|
||||
}
|
||||
headers = {
|
||||
'accept': 'application/json',
|
||||
'Content-Type': 'application/json'
|
||||
}
|
||||
session = requests.Session()
|
||||
retry_strategy = Retry(
|
||||
total=3,
|
||||
backoff_factor=1,
|
||||
status_forcelist=[500, 502, 503, 504]
|
||||
)
|
||||
adapter = HTTPAdapter(max_retries=retry_strategy)
|
||||
session.mount("https://", adapter)
|
||||
try:
|
||||
response = session.post(
|
||||
f"{config.app.get('narrato_api_url')}/video/config",
|
||||
headers=headers,
|
||||
json=api_params,
|
||||
timeout=30,
|
||||
verify=True
|
||||
)
|
||||
except Exception as e:
|
||||
pass
|
||||
custom_prompt = st.session_state.get('custom_prompt', '')
|
||||
processor = ScriptProcessor(
|
||||
model_name=text_model,
|
||||
api_key=text_api_key,
|
||||
prompt=custom_prompt,
|
||||
base_url=text_base_url or "",
|
||||
video_theme=st.session_state.get('video_theme', '')
|
||||
)
|
||||
|
||||
# 处理帧内容生成脚本
|
||||
script_result = processor.process_frames(frame_content_list)
|
||||
|
||||
# 结果转换为JSON字符串
|
||||
script = json.dumps(script_result, ensure_ascii=False, indent=2)
|
||||
|
||||
except Exception as e:
|
||||
logger.exception(f"大模型处理过程中发生错误\n{traceback.format_exc()}")
|
||||
raise Exception(f"分析失败: {str(e)}")
|
||||
|
||||
if script is None:
|
||||
st.error("生成脚本失败,请检查日志")
|
||||
st.stop()
|
||||
logger.info(f"脚本生成完成")
|
||||
if isinstance(script, list):
|
||||
st.session_state['video_clip_json'] = script
|
||||
elif isinstance(script, str):
|
||||
st.session_state['video_clip_json'] = json.loads(script)
|
||||
update_progress(80, "脚本生成完成")
|
||||
|
||||
time.sleep(0.1)
|
||||
progress_bar.progress(100)
|
||||
status_text.text("脚本生成完成!")
|
||||
st.success("视频脚本生成成功!")
|
||||
|
||||
except Exception as err:
|
||||
st.error(f"生成过程中发生错误: {str(err)}")
|
||||
logger.exception(f"生成脚本时发生错误\n{traceback.format_exc()}")
|
||||
finally:
|
||||
time.sleep(2)
|
||||
progress_bar.empty()
|
||||
status_text.empty()
|
||||
115
webui/utils/merge_video.py
Normal file
115
webui/utils/merge_video.py
Normal file
@ -0,0 +1,115 @@
|
||||
"""
|
||||
合并视频和字幕文件
|
||||
"""
|
||||
from moviepy.editor import VideoFileClip, concatenate_videoclips
|
||||
import pysrt
|
||||
import os
|
||||
|
||||
|
||||
def get_video_duration(video_path):
|
||||
"""获取视频时长(秒)"""
|
||||
video = VideoFileClip(video_path)
|
||||
duration = video.duration
|
||||
video.close()
|
||||
return duration
|
||||
|
||||
|
||||
def adjust_subtitle_timing(subtitle_path, time_offset):
|
||||
"""调整字幕时间戳"""
|
||||
subs = pysrt.open(subtitle_path)
|
||||
|
||||
# 为每个字幕项添加时间偏移
|
||||
for sub in subs:
|
||||
sub.start.hours += int(time_offset / 3600)
|
||||
sub.start.minutes += int((time_offset % 3600) / 60)
|
||||
sub.start.seconds += int(time_offset % 60)
|
||||
sub.start.milliseconds += int((time_offset * 1000) % 1000)
|
||||
|
||||
sub.end.hours += int(time_offset / 3600)
|
||||
sub.end.minutes += int((time_offset % 3600) / 60)
|
||||
sub.end.seconds += int(time_offset % 60)
|
||||
sub.end.milliseconds += int((time_offset * 1000) % 1000)
|
||||
|
||||
return subs
|
||||
|
||||
|
||||
def merge_videos_and_subtitles(video_paths, subtitle_paths, output_video_path, output_subtitle_path):
|
||||
"""合并视频和字幕文件"""
|
||||
if len(video_paths) != len(subtitle_paths):
|
||||
raise ValueError("视频文件数量与字幕文件数量不匹配")
|
||||
|
||||
# 1. 合并视频
|
||||
video_clips = []
|
||||
accumulated_duration = 0
|
||||
merged_subs = pysrt.SubRipFile()
|
||||
|
||||
try:
|
||||
# 处理所有视频和字幕
|
||||
for i, (video_path, subtitle_path) in enumerate(zip(video_paths, subtitle_paths)):
|
||||
# 添加视频
|
||||
print(f"处理视频 {i + 1}/{len(video_paths)}: {video_path}")
|
||||
video_clip = VideoFileClip(video_path)
|
||||
video_clips.append(video_clip)
|
||||
|
||||
# 处理字幕
|
||||
print(f"处理字幕 {i + 1}/{len(subtitle_paths)}: {subtitle_path}")
|
||||
if i == 0:
|
||||
# 第一个字幕文件直接读取
|
||||
current_subs = pysrt.open(subtitle_path)
|
||||
else:
|
||||
# 后续字幕文件需要调整时间戳
|
||||
current_subs = adjust_subtitle_timing(subtitle_path, accumulated_duration)
|
||||
|
||||
# 合并字幕
|
||||
merged_subs.extend(current_subs)
|
||||
|
||||
# 更新累计时长
|
||||
accumulated_duration += video_clip.duration
|
||||
|
||||
# 判断视频是否存在,若已经存在不重复合并
|
||||
if not os.path.exists(output_video_path):
|
||||
print("合并视频中...")
|
||||
final_video = concatenate_videoclips(video_clips)
|
||||
|
||||
# 保存合并后的视频
|
||||
print("保存合并后的视频...")
|
||||
final_video.write_videofile(output_video_path, audio_codec='aac')
|
||||
|
||||
# 保存合并后的字幕
|
||||
print("保存合并后的字幕...")
|
||||
merged_subs.save(output_subtitle_path, encoding='utf-8')
|
||||
|
||||
print("合并完成")
|
||||
|
||||
finally:
|
||||
# 清理资源
|
||||
for clip in video_clips:
|
||||
clip.close()
|
||||
|
||||
|
||||
def main():
|
||||
# 示例用法
|
||||
video_paths = [
|
||||
"temp/1.mp4",
|
||||
"temp/2.mp4",
|
||||
"temp/3.mp4",
|
||||
"temp/4.mp4",
|
||||
"temp/5.mp4",
|
||||
]
|
||||
|
||||
subtitle_paths = [
|
||||
"temp/1.srt",
|
||||
"temp/2.srt",
|
||||
"temp/3.srt",
|
||||
"temp/4.srt",
|
||||
"temp/5.srt",
|
||||
]
|
||||
|
||||
output_video_path = "temp/merged_video.mp4"
|
||||
output_subtitle_path = "temp/merged_subtitle.srt"
|
||||
|
||||
merge_videos_and_subtitles(video_paths, subtitle_paths, output_video_path, output_subtitle_path)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
100
webui/utils/vision_analyzer.py
Normal file
100
webui/utils/vision_analyzer.py
Normal file
@ -0,0 +1,100 @@
|
||||
import logging
|
||||
from typing import List, Dict, Any, Optional
|
||||
from app.utils import gemini_analyzer, qwenvl_analyzer
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
class VisionAnalyzer:
|
||||
def __init__(self):
|
||||
self.provider = None
|
||||
self.api_key = None
|
||||
self.model = None
|
||||
self.base_url = None
|
||||
self.analyzer = None
|
||||
|
||||
def initialize_gemini(self, api_key: str, model: str, base_url: str) -> None:
|
||||
"""
|
||||
初始化Gemini视觉分析器
|
||||
|
||||
Args:
|
||||
api_key: Gemini API密钥
|
||||
model: 模型名称
|
||||
base_url: API基础URL
|
||||
"""
|
||||
self.provider = 'gemini'
|
||||
self.api_key = api_key
|
||||
self.model = model
|
||||
self.base_url = base_url
|
||||
self.analyzer = gemini_analyzer.VisionAnalyzer(
|
||||
model_name=model,
|
||||
api_key=api_key
|
||||
)
|
||||
|
||||
def initialize_qwenvl(self, api_key: str, model: str, base_url: str) -> None:
|
||||
"""
|
||||
初始化QwenVL视觉分析器
|
||||
|
||||
Args:
|
||||
api_key: 阿里云API密钥
|
||||
model: 模型名称
|
||||
base_url: API基础URL
|
||||
"""
|
||||
self.provider = 'qwenvl'
|
||||
self.api_key = api_key
|
||||
self.model = model
|
||||
self.base_url = base_url
|
||||
self.analyzer = qwenvl_analyzer.QwenAnalyzer(
|
||||
model_name=model,
|
||||
api_key=api_key
|
||||
)
|
||||
|
||||
async def analyze_images(self, images: List[str], prompt: str, batch_size: int = 5) -> Dict[str, Any]:
|
||||
"""
|
||||
分析图片内容
|
||||
|
||||
Args:
|
||||
images: 图片路径列表
|
||||
prompt: 分析提示词
|
||||
batch_size: 每批处理的图片数量,默认为5
|
||||
|
||||
Returns:
|
||||
Dict: 分析结果
|
||||
"""
|
||||
if not self.analyzer:
|
||||
raise ValueError("未初始化视觉分析器")
|
||||
|
||||
return await self.analyzer.analyze_images(
|
||||
images=images,
|
||||
prompt=prompt,
|
||||
batch_size=batch_size
|
||||
)
|
||||
|
||||
def create_vision_analyzer(provider: str, **kwargs) -> VisionAnalyzer:
|
||||
"""
|
||||
创建视觉分析器实例
|
||||
|
||||
Args:
|
||||
provider: 提供商名称 ('gemini' 或 'qwenvl')
|
||||
**kwargs: 提供商特定的配置参数
|
||||
|
||||
Returns:
|
||||
VisionAnalyzer: 配置好的视觉分析器实例
|
||||
"""
|
||||
analyzer = VisionAnalyzer()
|
||||
|
||||
if provider.lower() == 'gemini':
|
||||
analyzer.initialize_gemini(
|
||||
api_key=kwargs.get('api_key'),
|
||||
model=kwargs.get('model'),
|
||||
base_url=kwargs.get('base_url')
|
||||
)
|
||||
elif provider.lower() == 'qwenvl':
|
||||
analyzer.initialize_qwenvl(
|
||||
api_key=kwargs.get('api_key'),
|
||||
model=kwargs.get('model'),
|
||||
base_url=kwargs.get('base_url')
|
||||
)
|
||||
else:
|
||||
raise ValueError(f"不支持的视觉分析提供商: {provider}")
|
||||
|
||||
return analyzer
|
||||
Loading…
x
Reference in New Issue
Block a user