Merge branch 'main' of github.com:linyqh/NarratoAI

This commit is contained in:
linyqh 2025-07-06 23:03:33 +08:00
commit db2696f1b6
83 changed files with 6440 additions and 2180 deletions

View File

@ -0,0 +1,157 @@
name: Auto Release Generator
on:
push:
branches:
- main
paths:
- 'project_version' # 确保路径准确,不使用通配符
jobs:
check-version-and-release:
runs-on: ubuntu-latest
permissions:
contents: write # 用于创建 releases
pull-requests: write # 可能需要的额外权限
steps:
- name: Checkout code
uses: actions/checkout@v4
with:
fetch-depth: 0 # 获取完整历史以检查变更
- name: Debug Environment
run: |
echo "工作目录内容:"
ls -la
echo "project_version 文件内容:"
cat project_version || echo "文件不存在"
- name: Check if version changed
id: check-version
run: |
# 获取当前版本号
if [ -f "project_version" ]; then
CURRENT_VERSION=$(cat project_version)
echo "Current version: $CURRENT_VERSION"
# 获取上一个提交中的版本号
git fetch origin main
if git show HEAD~1:project_version &>/dev/null; then
PREVIOUS_VERSION=$(git show HEAD~1:project_version)
echo "Previous version from commit: $PREVIOUS_VERSION"
if [[ "$CURRENT_VERSION" != "$PREVIOUS_VERSION" ]]; then
echo "Version changed from $PREVIOUS_VERSION to $CURRENT_VERSION"
echo "version_changed=true" >> $GITHUB_OUTPUT
echo "current_version=$CURRENT_VERSION" >> $GITHUB_OUTPUT
else
echo "Version unchanged"
echo "version_changed=false" >> $GITHUB_OUTPUT
fi
else
echo "Cannot find previous version, assuming first release"
echo "version_changed=true" >> $GITHUB_OUTPUT
echo "current_version=$CURRENT_VERSION" >> $GITHUB_OUTPUT
fi
else
echo "project_version file not found"
echo "version_changed=false" >> $GITHUB_OUTPUT
fi
- name: Set up Python
if: steps.check-version.outputs.version_changed == 'true'
uses: actions/setup-python@v4
with:
python-version: '3.10'
- name: Install OpenAI SDK
if: steps.check-version.outputs.version_changed == 'true'
run: pip install openai
- name: Get commits since last release
if: steps.check-version.outputs.version_changed == 'true'
id: get-commits
run: |
# 直接获取最近10个提交
echo "Getting last 13 commits"
COMMITS=$(git log -13 --pretty=format:"%s")
echo "Commits to be included in release notes:"
echo "$COMMITS"
echo "commits<<EOF" >> $GITHUB_OUTPUT
echo "$COMMITS" >> $GITHUB_OUTPUT
echo "EOF" >> $GITHUB_OUTPUT
- name: Generate release notes with AI
if: steps.check-version.outputs.version_changed == 'true'
id: generate-notes
env:
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
OPENAI_BASE_URL: https://api.siliconflow.cn/v1
CURRENT_VERSION: ${{ steps.check-version.outputs.current_version }}
run: |
cat > generate_release_notes.py << 'EOF'
import os
import sys
from openai import OpenAI
# 设置OpenAI客户端
client = OpenAI(
api_key=os.environ.get("OPENAI_API_KEY"),
base_url=os.environ.get("OPENAI_BASE_URL")
)
# 获取提交信息和版本号
commits = sys.stdin.read()
version = os.environ.get("CURRENT_VERSION")
# 调用API生成发布说明
try:
response = client.chat.completions.create(
model="deepseek-ai/DeepSeek-V3",
messages=[
{"role": "system", "content": "你是一个专业的软件发布说明生成助手。请根据提供的git提交信息生成一个结构化的发布说明包括新功能、改进、修复的bug等类别。使用中文回复。"},
{"role": "user", "content": f"请根据以下git提交信息,生成一个版本{version}的发布说明,内容详细且完整,相似的提交信息不要重复出现: \n\n{commits}"}
],
temperature=0.7,
)
release_notes = response.choices[0].message.content
print(f"commits: \n{commits}")
print(f"大模型总结的发布说明: \n{release_notes}")
except Exception as e:
print(f"Error calling OpenAI API: {e}")
release_notes = f"# 版本 {version} 发布\n\n## 更新内容\n\n"
# 简单处理提交信息
for line in commits.strip().split("\n"):
if line:
release_notes += f"- {line}\n"
# 输出生成的发布说明
print(release_notes)
# 保存到GitHub输出
with open(os.environ.get("GITHUB_OUTPUT"), "a") as f:
f.write("release_notes<<RELEASE_NOTES_EOF\n")
f.write(release_notes)
f.write("\nRELEASE_NOTES_EOF\n")
EOF
python generate_release_notes.py < <(echo "${{ steps.get-commits.outputs.commits }}")
- name: Debug release notes
if: steps.check-version.outputs.version_changed == 'true'
run: |
echo "Generated release notes:"
echo "${{ steps.generate-notes.outputs.release_notes }}"
- name: Create GitHub Release
if: steps.check-version.outputs.version_changed == 'true'
uses: softprops/action-gh-release@v1
with:
tag_name: v${{ steps.check-version.outputs.current_version }}
name: v${{ steps.check-version.outputs.current_version }}
body: ${{ steps.generate-notes.outputs.release_notes }}
draft: false
prerelease: false
token: ${{ secrets.GIT_TOKEN }}

View File

@ -19,6 +19,6 @@ jobs:
env:
GITHUB_TOKEN: ${{ secrets.GIT_TOKEN }}
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
OPENAI_API_ENDPOINT: https://api.groq.com/openai/v1
MODEL: llama-3.1-70b-versatile
OPENAI_API_ENDPOINT: https://api.siliconflow.cn/v1
MODEL: deepseek-ai/DeepSeek-V3
LANGUAGE: Chinese

View File

@ -0,0 +1,197 @@
name: Discord Release Notification
on:
release:
types: [published]
jobs:
notify-discord:
runs-on: ubuntu-latest
steps:
- name: Checkout code
uses: actions/checkout@v4
- name: Set up Python
uses: actions/setup-python@v4
with:
python-version: '3.10'
- name: Install dependencies
run: pip install openai discord-webhook requests
- name: Enhance release notes and send to Discord
env:
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
OPENAI_BASE_URL: https://api.siliconflow.cn/v1
DISCORD_WEBHOOK_URL: ${{ secrets.DISCORD_WEBHOOK_URL }}
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
run: |
cat > send_discord_notification.py << 'EOF'
import os
import sys
import json
from openai import OpenAI
import requests
from datetime import datetime
from discord_webhook import DiscordWebhook, DiscordEmbed
# 设置OpenAI客户端
client = OpenAI(
api_key=os.environ.get("OPENAI_API_KEY"),
base_url=os.environ.get("OPENAI_BASE_URL")
)
# 获取GitHub release信息
github_token = os.environ.get("GITHUB_TOKEN")
repo = os.environ.get("GITHUB_REPOSITORY")
# 直接从GitHub API获取最新release
headers = {"Authorization": f"token {github_token}"}
response = requests.get(f"https://api.github.com/repos/{repo}/releases/latest", headers=headers)
if response.status_code != 200:
print(f"Error fetching release info: {response.status_code}")
print(response.text)
sys.exit(1)
release_info = response.json()
# 提取需要的信息
release_notes = release_info.get("body", "无发布说明")
version = release_info.get("tag_name", "未知版本")
# 安全地解析发布日期
published_at = release_info.get("published_at")
if published_at:
try:
release_date = datetime.strptime(published_at, "%Y-%m-%dT%H:%M:%SZ").strftime("%Y年%m月%d日")
except ValueError:
release_date = "未知日期"
else:
release_date = "未知日期"
# 使用大模型润色发布说明
try:
response = client.chat.completions.create(
model="deepseek-ai/DeepSeek-V3",
messages=[
{"role": "system", "content": "你是一个专业的软件发布公告优化助手。请优化以下发布说明,使其更加生动、专业,并明确区分新功能、优化内容、修复内容和移除内容等类别。保持原有信息的完整性,同时增强可读性和专业性。使用中文回复。\n\n重要Discord不支持复杂的Markdown格式因此请使用简单的格式化\n1. 使用**粗体**和*斜体*而不是Markdown标题\n2. 使用简单的列表符号而不是Markdown列表\n3. 避免使用#、##等标题格式\n4. 不要使用表格、代码块等复杂格式\n5. 确保段落之间有空行\n6. 使用简单的分隔符(如 ------)来分隔不同部分"},
{"role": "user", "content": f"请优化以下版本{version}的发布说明使其更适合在Discord社区发布。请记住Discord不支持复杂的Markdown格式所以使用简单的格式化方式\n\n{release_notes}"}
],
temperature=0.7,
)
enhanced_notes = response.choices[0].message.content
print(f"大模型润色后的发布说明: \n{enhanced_notes}")
except Exception as e:
print(f"Error calling OpenAI API: {e}")
enhanced_notes = release_notes # 如果API调用失败使用原始发布说明
# 创建Discord消息
webhook_url = os.environ.get("DISCORD_WEBHOOK_URL")
if not webhook_url:
print("Error: DISCORD_WEBHOOK_URL not set")
sys.exit(1)
webhook = DiscordWebhook(url=webhook_url)
# 创建嵌入式消息
embed = DiscordEmbed(
title=f"🚀 NarratoAI {version} 发布公告",
description=f"发布日期: {release_date}",
color="5865F2" # Discord蓝色
)
# 处理发布说明确保不超过Discord的字段限制
# Discord字段值限制为1024个字符
MAX_FIELD_LENGTH = 1024
# 如果内容很短,直接添加
if enhanced_notes and len(enhanced_notes) <= MAX_FIELD_LENGTH:
embed.add_embed_field(name="📋 更新内容", value=enhanced_notes)
elif enhanced_notes:
# 尝试按段落或明显的分隔符分割内容
sections = []
# 检查是否有明显的新功能、优化、修复等部分
if "**新增功能**" in enhanced_notes or "**新功能**" in enhanced_notes:
parts = enhanced_notes.split("**新增功能**", 1)
if len(parts) > 1:
intro = parts[0].strip()
if intro:
sections.append(("📋 更新概述", intro))
rest = "**新增功能**" + parts[1]
# 进一步分割剩余部分
feature_end = -1
for marker in ["**优化内容**", "**性能优化**", "**修复内容**", "**bug修复**", "**问题修复**"]:
pos = rest.lower().find(marker.lower())
if pos != -1 and (feature_end == -1 or pos < feature_end):
feature_end = pos
if feature_end != -1:
sections.append(("✨ 新增功能", rest[:feature_end].strip()))
rest = rest[feature_end:]
else:
sections.append(("✨ 新增功能", rest.strip()))
rest = ""
# 继续分割剩余部分
if rest:
optimize_end = -1
for marker in ["**修复内容**", "**bug修复**", "**问题修复**"]:
pos = rest.lower().find(marker.lower())
if pos != -1 and (optimize_end == -1 or pos < optimize_end):
optimize_end = pos
if optimize_end != -1:
sections.append(("⚡ 优化内容", rest[:optimize_end].strip()))
sections.append(("🔧 修复内容", rest[optimize_end:].strip()))
else:
sections.append(("⚡ 优化内容", rest.strip()))
else:
# 如果没有明显的结构,按长度分割
chunks = [enhanced_notes[i:i+MAX_FIELD_LENGTH] for i in range(0, len(enhanced_notes), MAX_FIELD_LENGTH)]
for i, chunk in enumerate(chunks):
if i == 0:
sections.append(("📋 更新内容", chunk))
else:
sections.append((f"📋 更新内容(续{i}", chunk))
# 添加所有部分到embed
for name, content in sections:
if len(content) > MAX_FIELD_LENGTH:
# 如果单个部分仍然过长,进一步分割
sub_chunks = [content[i:i+MAX_FIELD_LENGTH] for i in range(0, len(content), MAX_FIELD_LENGTH)]
for i, chunk in enumerate(sub_chunks):
if i == 0:
embed.add_embed_field(name=name, value=chunk)
else:
embed.add_embed_field(name=f"{name}(续{i}", value=chunk)
else:
embed.add_embed_field(name=name, value=content)
else:
embed.add_embed_field(name="📋 更新内容", value="无详细更新内容")
# 添加下载链接
html_url = release_info.get("html_url", "")
if html_url:
embed.add_embed_field(name="📥 下载链接", value=html_url, inline=False)
# 设置页脚
embed.set_footer(text=f"NarratoAI 团队 • {release_date}")
embed.set_timestamp()
# 添加嵌入式消息到webhook
webhook.add_embed(embed)
# 发送消息
response = webhook.execute()
if response:
print(f"Discord notification sent with status code: {response.status_code}")
else:
print("Failed to send Discord notification")
EOF
# 执行脚本
python send_discord_notification.py

View File

@ -1,48 +0,0 @@
name: build_docker
on:
release:
types: [created] # 表示在创建新的 Release 时触发
workflow_dispatch:
jobs:
build_docker:
name: Build docker
runs-on: ubuntu-latest
steps:
- name: Remove unnecessary files
run: |
sudo rm -rf /usr/share/dotnet
sudo rm -rf "$AGENT_TOOLSDIRECTORY"
- name: Checkout
uses: actions/checkout@v3
- name: Set up QEMU
uses: docker/setup-qemu-action@v3
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v3
- name: Login to DockerHub
uses: docker/login-action@v3
with:
username: ${{ secrets.DOCKERHUB_USERNAME }}
password: ${{ secrets.DOCKERHUB_TOKEN }}
- name: Extract project version
id: extract_version
run: |
project_version=$(grep 'project_version' config.example.toml | cut -d '"' -f 2)
echo "PROJECT_VERSION=$project_version" >> $GITHUB_ENV
- name: Build and push
id: docker_build
uses: docker/build-push-action@v6
with:
context: .
file: ./Dockerfile
push: true
platforms: linux/amd64,linux/arm64
tags: |
${{ secrets.DOCKERHUB_USERNAME }}/narratoai:${{ env.PROJECT_VERSION }}
${{ secrets.DOCKERHUB_USERNAME }}/narratoai:latest

View File

@ -1,40 +0,0 @@
name: Latest Changes
on:
pull_request_target:
branches:
- main
types:
- closed
workflow_dispatch:
inputs:
number:
description: PR number
required: true
debug_enabled:
description: "在启用 tmate 调试的情况下运行构建 (https://github.com/marketplace/actions/debugging-with-tmate)"
required: false
default: "false"
jobs:
latest-changes:
runs-on: ubuntu-latest
permissions:
pull-requests: read
steps:
- name: Dump GitHub context
env:
GITHUB_CONTEXT: ${{ toJson(github) }}
run: echo "$GITHUB_CONTEXT"
- uses: actions/checkout@v4
with:
# 允许将最新更改提交到主分支
token: ${{ secrets.GIT_TOKEN }}
- uses: tiangolo/latest-changes@0.3.2
with:
token: ${{ secrets.GIT_TOKEN }}
latest_changes_file: ./release-notes.md
latest_changes_header: "## Latest Changes"
end_regex: "^## "
debug_logs: true
label_header_prefix: "### "

View File

@ -1,22 +0,0 @@
name: Release Drafter
on:
push:
branches:
- main
pull_request:
types: [opened, reopened, synchronize]
permissions:
contents: read
jobs:
update_release_draft:
permissions:
contents: write
pull-requests: write
runs-on: ubuntu-latest
steps:
- uses: release-drafter/release-drafter@v5
env:
GITHUB_TOKEN: ${{ secrets.GIT_TOKEN }}

4
.gitignore vendored
View File

@ -32,4 +32,8 @@ resource/fonts/*.ttf
resource/fonts/*.otf
resource/srt/*.srt
app/models/faster-whisper-large-v2/*
app/models/faster-whisper-large-v3/*
app/models/bert/*
bug清单.md
task.md

View File

@ -1,197 +0,0 @@
<div align="center">
<h1 align="center" style="font-size: 2cm;"> NarratoAI 😎📽️ </h1>
<h3 align="center">An all-in-one AI-powered tool for film commentary and automated video editing.🎬🎞️ </h3>
<h3>📖 English | <a href="README.md">简体中文</a> | <a href="README-ja.md">日本語</a> </h3>
<div align="center">
[//]: # ( <a href="https://trendshift.io/repositories/8731" target="_blank"><img src="https://trendshift.io/api/badge/repositories/8731" alt="harry0703%2FNarratoAI | Trendshift" style="width: 250px; height: 55px;" width="250" height="55"/></a>)
</div>
<br>
NarratoAI is an automated video narration tool that provides an all-in-one solution for script writing, automated video editing, voice-over, and subtitle generation, powered by LLM to enhance efficient content creation.
<br>
[![madewithlove](https://img.shields.io/badge/made_with-%E2%9D%A4-red?style=for-the-badge&labelColor=orange)](https://github.com/linyqh/NarratoAI)
[![GitHub license](https://img.shields.io/github/license/linyqh/NarratoAI?style=for-the-badge)](https://github.com/linyqh/NarratoAI/blob/main/LICENSE)
[![GitHub issues](https://img.shields.io/github/issues/linyqh/NarratoAI?style=for-the-badge)](https://github.com/linyqh/NarratoAI/issues)
[![GitHub stars](https://img.shields.io/github/stars/linyqh/NarratoAI?style=for-the-badge)](https://github.com/linyqh/NarratoAI/stargazers)
<a href="https://github.com/linyqh/NarratoAI/wiki" target="_blank">💬 Join the open source community to get project updates and the latest news.</a>
<h3>Home</h3>
![](docs/index-en.png)
<h3>Video Review Interface</h3>
![](docs/check-en.png)
</div>
## Future Plans 🥳
- [x] Windows Integration Pack Release
- [ ] Optimized the story generation process and improved the generation effect
- [ ] Support local large model MiniCPM-V
- [ ] Support local large model Qwen2-VL
- [ ] ...
## System Requirements 📦
- Recommended minimum: CPU with 4 cores or more, 8GB RAM or more, GPU is not required
- Windows 10 or MacOS 11.0 or above
## Quick Start 🚀
### 1. Apply for Google AI Studio Account
1. Visit https://aistudio.google.com/app/prompts/new_chat to apply for an account.
2. Click `Get API Key` to request an API Key.
3. Enter the obtained API Key into the `gemini_api_key` setting in the `config.example.toml` file.
### 2. Configure Proxy VPN
> The method to configure VPN is not restricted, as long as you can access Google's network. Here, `clash` is used as an example.
1. Note the port of the clash service, usually `http://127.0.0.1:7890`.
2. If the port is not `7890`, modify the `VPN_PROXY_URL` in the `docker-compose.yml` file to your proxy address.
```yaml
environment:
- "VPN_PROXY_URL=http://host.docker.internal:7890" # Change to your proxy port; host.docker.internal represents the IP of the physical machine.
```
3. (Optional) Or modify the `proxy` settings in the `config.example.toml` file.
```toml
[proxy]
### Use a proxy to access the Pexels API
### Format: "http://<username>:<password>@<proxy>:<port>"
### Example: "http://user:pass@proxy:1234"
### Doc: https://requests.readthedocs.io/en/latest/user/advanced/#proxies
http = "http://xx.xx.xx.xx:7890"
https = "http://xx.xx.xx.xx:7890"
```
### 3. Get Started 📥 with the Modpack (for Windows users)
NarratoAI Modpack v0.1.2 is released 🚀
Hurry up and follow the WeChat public account [NarratoAI助手] and reply to the keyword [整合包] to get the latest download link! Give it a try!
Note:
- Currently only available for Windows, Mac version is in development, Linux version will be available in a future release.
### 4. Get started 🐳 with docker (for Mac and Linux users)
#### ① clone project, Start Docker
```shell
git clone https://github.com/linyqh/NarratoAI.git
cd NarratoAI
docker-compose up
```
#### ② Access the Web Interface
Open your browser and go to http://127.0.0.1:8501
#### ③ Access the API Documentation
Open your browser and go to http://127.0.0.1:8080/docs or http://127.0.0.1:8080/redoc
## Usage
#### 1. Basic Configuration, Select Model, Enter API Key, and Choose Model
> Currently, only the `Gemini` model is supported. Other modes will be added in future updates. Contributions are welcome via [PR](https://github.com/linyqh/NarratoAI/pulls) to join in the development 🎉🎉🎉
<div align="center">
<img src="docs/img001-en.png" alt="001" width="1000"/>
</div>
#### 2. Select the Video for Narration and Click to Generate Video Script
> A demo video is included in the platform. To use your own video, place the mp4 file in the `resource/videos` directory and refresh your browser.
> Note: The filename can be anything, but it must not contain Chinese characters, special characters, spaces, backslashes, etc.
<div align="center">
<img src="docs/img002-en.png" alt="002" width="400"/>
</div>
#### 3. Save the Script and Start Editing
> After saving the script, refresh the browser, and the newly generated `.json` script file will appear in the script file dropdown. Select the json file and video to start editing.
<div align="center">
<img src="docs/img003-en.png" alt="003" width="400"/>
</div>
#### 4. Review the Video; if there are segments that don't meet the rules, click to regenerate or manually edit them.
<div align="center">
<img src="docs/img004-en.png" alt="003" width="1000"/>
</div>
#### 5. Configure Basic Video Parameters
<div align="center">
<img src="docs/img005-en.png" alt="003" width="700"/>
</div>
#### 6. Start Generating
<div align="center">
<img src="docs/img006-en.png" alt="003" width="1000"/>
</div>
#### 7. Video Generation Complete
<div align="center">
<img src="docs/img007-en.png" alt="003" width="1000"/>
</div>
## Development 💻
1. Install Dependencies
```shell
conda create -n narratoai python=3.10
conda activate narratoai
cd narratoai
pip install -r requirements.txt
```
2. Install ImageMagick
###### Windows:
- Download https://imagemagick.org/archive/binaries/ImageMagick-7.1.1-38-Q16-x64-static.exe
- Install the downloaded ImageMagick, ensuring you do not change the installation path
- Update `imagemagick_path` in the `config.toml` file to your actual installation path (typically `C:\Program Files\ImageMagick-7.1.1-Q16\magick.exe`)
###### MacOS:
```shell
brew install imagemagick
````
###### Ubuntu
```shell
sudo apt-get install imagemagick
```
###### CentOS
```shell
sudo yum install ImageMagick
```
3. initiate webui
```shell
streamlit run ./webui/Main.py --browser.serverAddress=127.0.0.1 --server.enableCORS=True --browser.gatherUsageStats=False
```
4. Access http://127.0.0.1:8501
## Feedback & Suggestions 📢
### 👏 1. You can submit [issues](https://github.com/linyqh/NarratoAI/issues) or [pull requests](https://github.com/linyqh/NarratoAI/pulls)
### 💬 2. [Join the open source community exchange group]((https://github.com/linyqh/NarratoAI/wiki))
### 👉 3. [frequently asked questions](https://thread-marsupial-df8.notion.site/105866888dab80988650fa063b1df4eb)
## Reference Projects 📚
- https://github.com/FujiwaraChoki/MoneyPrinter
- https://github.com/harry0703/MoneyPrinterTurbo
This project was refactored based on the above projects with the addition of video narration features. Thanks to the original authors for their open-source spirit 🥳🥳🥳
## License 📝
Click to view the [`LICENSE`](LICENSE) file
## Star History
[![Star History Chart](https://api.star-history.com/svg?repos=linyqh/NarratoAI&type=Date)](https://star-history.com/#linyqh/NarratoAI&Date)

115
README-en.md Normal file
View File

@ -0,0 +1,115 @@
<div align="center">
<h1 align="center" style="font-size: 2cm;"> NarratoAI 😎📽️ </h1>
<h3 align="center">An all-in-one AI-powered tool for film commentary and automated video editing.🎬🎞️ </h3>
<h3>📖 English | <a href="README.md">简体中文</a> | <a href="README-ja.md">日本語</a> </h3>
<div align="center">
[//]: # ( <a href="https://trendshift.io/repositories/8731" target="_blank"><img src="https://trendshift.io/api/badge/repositories/8731" alt="harry0703%2FNarratoAI | Trendshift" style="width: 250px; height: 55px;" width="250" height="55"/></a>)
</div>
<br>
NarratoAI is an automated video narration tool that provides an all-in-one solution for script writing, automated video editing, voice-over, and subtitle generation, powered by LLM to enhance efficient content creation.
<br>
[![madewithlove](https://img.shields.io/badge/made_with-%E2%9D%A4-red?style=for-the-badge&labelColor=orange)](https://github.com/linyqh/NarratoAI)
[![GitHub license](https://img.shields.io/github/license/linyqh/NarratoAI?style=for-the-badge)](https://github.com/linyqh/NarratoAI/blob/main/LICENSE)
[![GitHub issues](https://img.shields.io/github/issues/linyqh/NarratoAI?style=for-the-badge)](https://github.com/linyqh/NarratoAI/issues)
[![GitHub stars](https://img.shields.io/github/stars/linyqh/NarratoAI?style=for-the-badge)](https://github.com/linyqh/NarratoAI/stargazers)
<a href="https://discord.com/invite/V2pbAqqQNb" target="_blank">💬 Join the open source community to get project updates and the latest news.</a>
<h2><a href="https://p9mf6rjv3c.feishu.cn/wiki/SP8swLLZki5WRWkhuFvc2CyInDg?from=from_copylink" target="_blank">🎉🎉🎉 Official Documentation 🎉🎉🎉</a> </h2>
<h3>Home</h3>
![](docs/index-en.png)
<h3>Video Review Interface</h3>
![](docs/check-en.png)
</div>
## Latest News
- 2025.05.11 Released new version 0.6.0, supports **short drama commentary** and optimized editing process
- 2025.03.06 Released new version 0.5.2, supports DeepSeek R1 and DeepSeek V3 models for short drama mixing
- 2024.12.16 Released new version 0.3.9, supports Alibaba Qwen2-VL model for video understanding; supports short drama mixing
- 2024.11.24 Opened Discord community: https://discord.com/invite/V2pbAqqQNb
- 2024.11.11 Migrated open source community, welcome to join! [Join the official community](https://github.com/linyqh/NarratoAI/wiki)
- 2024.11.10 Released official documentation, details refer to [Official Documentation](https://p9mf6rjv3c.feishu.cn/wiki/SP8swLLZki5WRWkhuFvc2CyInDg)
- 2024.11.10 Released new version v0.3.5; optimized video editing process,
## Major Benefits 🎉
From now on, fully support DeepSeek model! Register to enjoy 20 million free tokens (worth 14 yuan platform quota), editing a 10-minute video only costs 0.1 yuan!
🔥 Quick benefits:
1⃣ Click the link to register: https://cloud.siliconflow.cn/i/pyOKqFCV
2⃣ Log in with your phone number, **be sure to fill in the invitation code: pyOKqFCV**
3⃣ Receive a 14 yuan quota, experience high cost-effective AI editing quickly!
💡 Low cost, high creativity:
Silicon Flow API Key can be integrated with one click, doubling intelligent editing efficiency!
(Note: The invitation code is the only proof for benefit collection, automatically credited after registration)
Immediately take action to unlock your AI productivity with "pyOKqFCV"!
😊 Update Steps:
Integration Package: Click update.bat one-click update script
Code Build: Use git pull to fetch the latest code
## Announcement 📢
_**Note⚠: Recently, someone has been impersonating the author on x (Twitter) to issue tokens on the pump.fun platform! This is a scam!!! Do not be deceived! Currently, NarratoAI has not made any official promotions on x (Twitter), please be cautious**_
Below is a screenshot of this person's x (Twitter) homepage
<img src="https://github.com/user-attachments/assets/c492ab99-52cd-4ba2-8695-1bd2073ecf12" alt="Screenshot_20250109_114131_Samsung Internet" style="width:30%; height:auto;">
## Future Plans 🥳
- [x] Windows Integration Pack Release
- [x] Optimized the story generation process and improved the generation effect
- [x] Released version 0.3.5 integration package
- [x] Support Alibaba Qwen2-VL large model for video understanding
- [x] Support short drama commentary
- [x] One-click merge materials
- [x] One-click transcription
- [x] One-click clear cache
- [ ] Support exporting to Jianying drafts
- [X] Support short drama commentary
- [ ] Character face matching
- [ ] Support automatic matching based on voiceover, script, and video materials
- [ ] Support more TTS engines
- [ ] ...
## System Requirements 📦
- Recommended minimum: CPU with 4 cores or more, 8GB RAM or more, GPU is not required
- Windows 10/11 or MacOS 11.0 or above
- [Python 3.12+](https://www.python.org/downloads/)
## Feedback & Suggestions 📢
👏 1. You can submit [issue](https://github.com/linyqh/NarratoAI/issues) or [pull request](https://github.com/linyqh/NarratoAI/pulls)
💬 2. [Join the open source community exchange group](https://github.com/linyqh/NarratoAI/wiki)
📷 3. Follow the official account [NarratoAI助手] to grasp the latest news
## Reference Projects 📚
- https://github.com/FujiwaraChoki/MoneyPrinter
- https://github.com/harry0703/MoneyPrinterTurbo
This project was refactored based on the above projects with the addition of video narration features. Thanks to the original authors for their open-source spirit 🥳🥳🥳
## Buy the Author a Cup of Coffee ☕️
<div style="display: flex; justify-content: space-between;">
<img src="https://github.com/user-attachments/assets/5038ccfb-addf-4db1-9966-99415989fd0c" alt="Image 1" style="width: 350px; height: 350px; margin: auto;"/>
<img src="https://github.com/user-attachments/assets/07d4fd58-02f0-425c-8b59-2ab94b4f09f8" alt="Image 2" style="width: 350px; height: 350px; margin: auto;"/>
</div>
## License 📝
Click to view [`LICENSE`](LICENSE) file
## Star History
[![Star History Chart](https://api.star-history.com/svg?repos=linyqh/NarratoAI&type=Date)](https://star-history.com/#linyqh/NarratoAI&Date)

View File

@ -39,8 +39,15 @@ NarratoAIは、LLMを活用してスクリプト作成、自動ビデオ編集
- [x] Windows統合パックリリース
- [x] ストーリー生成プロセスの最適化、生成効果の向上
- [x] バージョン0.3.5統合パックリリース
- [ ] アリババQwen2-VL大規模モデルのビデオ理解サポート
- [ ] 短編ドラマの解説サポート
- [x] アリババQwen2-VL大規模モデルのビデオ理解サポート
- [x] 短編ドラマの解説サポート
- [x] 一クリックで素材を統合
- [x] 一クリックで文字起こし
- [x] 一クリックでキャッシュをクリア
- [ ] ジャン映草稿のエクスポートをサポート
- [ ] 主役の顔のマッチング
- [ ] 音声、スクリプト、ビデオ素材に基づいて自動マッチングをサポート
- [ ] より多くのTTSエンジンをサポート
- [ ] ...
## システム要件 📦

View File

@ -4,7 +4,7 @@
<h3 align="center">一站式 AI 影视解说+自动化剪辑工具🎬🎞️ </h3>
<h3>📖 <a href="README-cn.md">English</a> | 简体中文 | <a href="README-ja.md">日本語</a> </h3>
<h3>📖 <a href="README-en.md">English</a> | 简体中文 | <a href="README-ja.md">日本語</a> </h3>
<div align="center">
[//]: # ( <a href="https://trendshift.io/repositories/8731" target="_blank"><img src="https://trendshift.io/api/badge/repositories/8731" alt="harry0703%2FNarratoAI | Trendshift" style="width: 250px; height: 55px;" width="250" height="55"/></a>)
@ -32,6 +32,7 @@ NarratoAI 是一个自动化影视解说工具基于LLM实现文案撰写、
</div>
## 最新资讯
- 2025.05.11 发布新版本 0.6.0,支持 **短剧解说** 和 优化剪辑流程
- 2025.03.06 发布新版本 0.5.2,支持 DeepSeek R1 和 DeepSeek V3 模型进行短剧混剪
- 2024.12.16 发布新版本 0.3.9,支持阿里 Qwen2-VL 模型理解视频;支持短剧混剪
- 2024.11.24 开通 discord 社群https://discord.com/invite/V2pbAqqQNb
@ -75,16 +76,17 @@ _**注意⚠️:近期在 x (推特) 上发现有人冒充作者在 pump.fun
- [x] 一键转录
- [x] 一键清理缓存
- [ ] 支持导出剪映草稿
- [ ] 支持短剧解说
- [X] 支持短剧解说
- [ ] 主角人脸匹配
- [ ] 支持根据口播,文案,视频素材自动匹配
- [ ] 支持更多 TTS 引擎
- [ ] ...
## 配置要求 📦
- 建议最低 CPU 4核或以上内存 8G 或以上,显卡非必须
- Windows 10 或 MacOS 11.0 以上系统
- [Python 3.10+](https://www.python.org/downloads/)
- Windows 10/11 或 MacOS 11.0 以上系统
- [Python 3.12+](https://www.python.org/downloads/)
## 反馈建议 📢

View File

@ -13,6 +13,7 @@ from app.config import config
from app.models.exception import HttpException
from app.router import root_api_router
from app.utils import utils
from app.utils import ffmpeg_utils
def exception_handler(request: Request, e: HttpException):
@ -80,3 +81,10 @@ def shutdown_event():
@app.on_event("startup")
def startup_event():
logger.info("startup event")
# 检测FFmpeg硬件加速
hwaccel_info = ffmpeg_utils.detect_hardware_acceleration()
if hwaccel_info["available"]:
logger.info(f"FFmpeg硬件加速检测结果: 可用 | 类型: {hwaccel_info['type']} | 编码器: {hwaccel_info['encoder']} | 独立显卡: {hwaccel_info['is_dedicated_gpu']} | 参数: {hwaccel_info['hwaccel_args']}")
else:
logger.warning(f"FFmpeg硬件加速不可用: {hwaccel_info['message']}, 将使用CPU软件编码")

View File

@ -6,6 +6,19 @@ from loguru import logger
root_dir = os.path.dirname(os.path.dirname(os.path.dirname(os.path.realpath(__file__))))
config_file = f"{root_dir}/config.toml"
version_file = f"{root_dir}/project_version"
def get_version_from_file():
"""从project_version文件中读取版本号"""
try:
if os.path.isfile(version_file):
with open(version_file, "r", encoding="utf-8") as f:
return f.read().strip()
return "0.1.0" # 默认版本号
except Exception as e:
logger.error(f"读取版本号文件失败: {str(e)}")
return "0.1.0" # 默认版本号
def load_config():
@ -57,7 +70,8 @@ project_description = _cfg.get(
"project_description",
"<a href='https://github.com/linyqh/NarratoAI'>https://github.com/linyqh/NarratoAI</a>",
)
project_version = _cfg.get("app", {}).get("project_version")
# 从文件读取版本号,而不是从配置文件中获取
project_version = get_version_from_file()
reload_debug = False
imagemagick_path = app.get("imagemagick_path", "")

View File

@ -1,6 +1,6 @@
import warnings
from enum import Enum
from typing import Any, List, Optional
from typing import Any, List, Optional, Union
import pydantic
from pydantic import BaseModel, Field
@ -13,6 +13,24 @@ warnings.filterwarnings(
)
class AudioVolumeDefaults:
"""音量配置默认值常量类 - 确保全局一致性"""
# 语音音量默认值
VOICE_VOLUME = 1.0
TTS_VOLUME = 1.0
# 原声音量默认值 - 这是修复bug的关键
ORIGINAL_VOLUME = 0.7
# 背景音乐音量默认值
BGM_VOLUME = 0.3
# 音量范围
MIN_VOLUME = 0.0
MAX_VOLUME = 1.0
class VideoConcatMode(str, Enum):
random = "random"
sequential = "sequential"
@ -20,7 +38,9 @@ class VideoConcatMode(str, Enum):
class VideoAspect(str, Enum):
landscape = "16:9"
landscape_2 = "4:3"
portrait = "9:16"
portrait_2 = "3:4"
square = "1:1"
def to_resolution(self):
@ -99,7 +119,7 @@ class VideoParams(BaseModel):
video_subject: str
video_script: str = "" # 用于生成视频的脚本
video_terms: Optional[str | list] = None # 用于生成视频的关键词
video_terms: Optional[Union[str, list]] = None # 用于生成视频的关键词
video_aspect: Optional[VideoAspect] = VideoAspect.portrait.value
video_concat_mode: Optional[VideoConcatMode] = VideoConcatMode.random.value
video_clip_duration: Optional[int] = 5
@ -111,11 +131,11 @@ class VideoParams(BaseModel):
video_language: Optional[str] = "" # auto detect
voice_name: Optional[str] = ""
voice_volume: Optional[float] = 1.0
voice_volume: Optional[float] = AudioVolumeDefaults.VOICE_VOLUME
voice_rate: Optional[float] = 1.0
bgm_type: Optional[str] = "random"
bgm_file: Optional[str] = ""
bgm_volume: Optional[float] = 0.2
bgm_volume: Optional[float] = AudioVolumeDefaults.BGM_VOLUME
subtitle_enabled: Optional[bool] = True
subtitle_position: Optional[str] = "bottom" # top, bottom, center
@ -155,11 +175,11 @@ class AudioRequest(BaseModel):
video_script: str
video_language: Optional[str] = ""
voice_name: Optional[str] = "zh-CN-XiaoxiaoNeural-Female"
voice_volume: Optional[float] = 1.0
voice_volume: Optional[float] = AudioVolumeDefaults.VOICE_VOLUME
voice_rate: Optional[float] = 1.2
bgm_type: Optional[str] = "random"
bgm_file: Optional[str] = ""
bgm_volume: Optional[float] = 0.2
bgm_volume: Optional[float] = AudioVolumeDefaults.BGM_VOLUME
video_source: Optional[str] = "local"
@ -345,7 +365,7 @@ class VideoClipParams(BaseModel):
# video_concat_mode: Optional[VideoConcatMode] = VideoConcatMode.random.value
voice_name: Optional[str] = Field(default="zh-CN-YunjianNeural", description="语音名称")
voice_volume: Optional[float] = Field(default=1.0, description="解说语音音量")
voice_volume: Optional[float] = Field(default=AudioVolumeDefaults.VOICE_VOLUME, description="解说语音音量")
voice_rate: Optional[float] = Field(default=1.0, description="语速")
voice_pitch: Optional[float] = Field(default=1.0, description="语调")
@ -360,13 +380,14 @@ class VideoClipParams(BaseModel):
text_back_color: Optional[str] = None # 文本背景色
stroke_color: str = "black" # 描边颜色
stroke_width: float = 1.5 # 描边宽度
subtitle_position: str = "bottom" # top, bottom, center, custom
subtitle_position: str = "bottom" # top, bottom, center, custom
custom_position: float = 70.0 # 自定义位置
n_threads: Optional[int] = Field(default=16, description="解说语音音量") # 线程<E7BABF><E7A88B><EFBFBD>,有助于提升视频处理速度
n_threads: Optional[int] = Field(default=16, description="线程数") # 线程数,有助于提升视频处理速度
tts_volume: Optional[float] = Field(default=1.0, description="解说语音音量(后处理)")
original_volume: Optional[float] = Field(default=1.0, description="视频原声音量")
bgm_volume: Optional[float] = Field(default=0.6, description="背景音乐音量")
tts_volume: Optional[float] = Field(default=AudioVolumeDefaults.TTS_VOLUME, description="解说语音音量(后处理)")
original_volume: Optional[float] = Field(default=AudioVolumeDefaults.ORIGINAL_VOLUME, description="视频原声音量")
bgm_volume: Optional[float] = Field(default=AudioVolumeDefaults.BGM_VOLUME, description="背景音乐音量")
class VideoTranscriptionRequest(BaseModel):

View File

@ -6,6 +6,7 @@ class GenerateScriptRequest(BaseModel):
video_path: str
video_theme: Optional[str] = ""
custom_prompt: Optional[str] = ""
frame_interval_input: Optional[int] = 5
skip_seconds: Optional[int] = 0
threshold: Optional[int] = 30
vision_batch_size: Optional[int] = 5

View File

@ -0,0 +1,97 @@
#!/usr/bin/env python
# -*- coding: UTF-8 -*-
'''
@Project: NarratoAI
@File : prompt
@Author : 小林同学
@Date : 2025/5/9 上午12:57
'''
# 字幕剧情分析提示词
subtitle_plot_analysis_v1 = """
# 角色
你是一位专业的剧本分析师和剧情概括助手
# 任务
我将为你提供一部短剧的完整字幕文本请你基于这些字幕完成以下任务
1. **整体剧情分析**简要概括整个短剧的核心剧情脉络主要冲突和结局如果有的话
2. **分段剧情解析与时间戳定位**
* 将整个短剧划分为若干个关键的剧情段落例如开端发展转折高潮结局或根据具体情节自然划分
* 段落数应该与字幕长度成正比
* 对于每一个剧情段落
* **概括该段落的主要内容**用简洁的语言描述这段剧情发生了什么
* **标注对应的时间戳范围**明确指出该剧情段落对应的开始字幕时间戳和结束字幕时间戳请直接从字幕中提取时间信息
# 输入格式
字幕内容通常包含时间戳和对话例如
```
00:00:05,000 --> 00:00:10,000
[角色A]: 你好吗
00:00:10,500 --> 00:00:15,000
[角色B]: 我很好谢谢发生了一些有趣的事情
... (更多字幕内容) ...
```
我将把实际字幕粘贴在下方
# 输出格式要求
请按照以下格式清晰地呈现分析结果
**整体剧情概括**
[此处填写对整个短剧剧情的概括]
**分段剧情解析**
**剧情段落 1[段落主题/概括例如主角登场与背景介绍]**
* **时间戳** [开始时间戳] --> [结束时间戳]
* **内容概要** [对这段剧情的详细描述]
**剧情段落 2[段落主题/概括例如第一个冲突出现]**
* **时间戳** [开始时间戳] --> [结束时间戳]
* **内容概要** [对这段剧情的详细描述]
... (根据实际剧情段落数量继续) ...
**剧情段落 N[段落主题/概括例如结局与反思]**
* **时间戳** [开始时间戳] --> [结束时间戳]
* **内容概要** [对这段剧情的详细描述]
# 注意事项
* 请确保时间戳的准确性直接引用字幕中的时间
* 剧情段落的划分应合乎逻辑能够反映剧情的起承转合
* 语言表达应简洁准确客观
# 限制
1. 严禁输出与分析结果无关的内容
2.
# 请处理以下字幕:
"""
plot_writing = """
我是一个影视解说up主需要为我的粉丝讲解短剧%s的剧情目前正在解说剧情希望能让粉丝通过我的解说了解剧情并且产生 继续观看的兴趣请生成一篇解说脚本包含解说文案以及穿插原声的片段下面<plot>中的内容是短剧的剧情概述
<plot>
%s
</plot>
请使用 json 格式进行输出使用 <output> 中的输出格式
<output>
{
"items": [
{
"_id": 1, # 唯一递增id
"timestamp": "00:00:05,390-00:00:10,430",
"picture": "剧情描述或者备注",
"narration": "解说文案,如果片段为穿插的原片片段,可以直接使用 ‘播放原片+_id 进行占位",
"OST": "值为 0 表示当前片段为解说片段,值为 1 表示当前片段为穿插的原片"
}
}
</output>
<restriction>
1. 只输出 json 内容不要输出其他任何说明性的文字
2. 解说文案的语言使用 简体中文
3. 严禁虚构剧情所有画面只能从 <polt> 中摘取
4. 严禁虚构时间戳所有时间戳范围只能从 <polt> 中摘取
</restriction>
"""

View File

@ -0,0 +1,456 @@
#!/usr/bin/env python
# -*- coding: UTF-8 -*-
'''
@Project: NarratoAI
@File : 短剧解说
@Author : 小林同学
@Date : 2025/5/9 上午12:36
'''
import os
import json
import requests
from typing import Dict, Any, Optional
from loguru import logger
from app.config import config
from app.utils.utils import get_uuid, storage_dir
from app.services.SDE.prompt import subtitle_plot_analysis_v1, plot_writing
class SubtitleAnalyzer:
"""字幕剧情分析器,负责分析字幕内容并提取关键剧情段落"""
def __init__(
self,
api_key: Optional[str] = None,
model: Optional[str] = None,
base_url: Optional[str] = None,
custom_prompt: Optional[str] = None,
temperature: Optional[float] = 1.0,
):
"""
初始化字幕分析器
Args:
api_key: API密钥如果不提供则从配置中读取
model: 模型名称如果不提供则从配置中读取
base_url: API基础URL如果不提供则从配置中读取或使用默认值
custom_prompt: 自定义提示词如果不提供则使用默认值
temperature: 模型温度
"""
# 使用传入的参数或从配置中获取
self.api_key = api_key
self.model = model
self.base_url = base_url
self.temperature = temperature
# 设置提示词模板
self.prompt_template = custom_prompt or subtitle_plot_analysis_v1
# 初始化HTTP请求所需的头信息
self._init_headers()
def _init_headers(self):
"""初始化HTTP请求头"""
try:
# 基础请求头包含API密钥和内容类型
self.headers = {
"Content-Type": "application/json",
"Authorization": f"Bearer {self.api_key}"
}
# logger.debug(f"初始化成功 - API Key: {self.api_key[:8]}... - Base URL: {self.base_url}")
except Exception as e:
logger.error(f"初始化请求头失败: {str(e)}")
raise
def analyze_subtitle(self, subtitle_content: str) -> Dict[str, Any]:
"""
分析字幕内容
Args:
subtitle_content: 字幕内容文本
Returns:
Dict[str, Any]: 包含分析结果的字典
"""
try:
# 构建完整提示词
prompt = f"{self.prompt_template}\n\n{subtitle_content}"
# 构建请求体数据
payload = {
"model": self.model,
"messages": [
{"role": "system", "content": "你是一位专业的剧本分析师和剧情概括助手。"},
{"role": "user", "content": prompt}
],
"temperature": self.temperature
}
# 构建请求地址
url = f"{self.base_url}/chat/completions"
# 发送HTTP请求
response = requests.post(url, headers=self.headers, json=payload)
# 解析响应
if response.status_code == 200:
response_data = response.json()
# 提取响应内容
if "choices" in response_data and len(response_data["choices"]) > 0:
analysis_result = response_data["choices"][0]["message"]["content"]
logger.debug(f"字幕分析完成消耗的tokens: {response_data.get('usage', {}).get('total_tokens', 0)}")
# 返回结果
return {
"status": "success",
"analysis": analysis_result,
"tokens_used": response_data.get("usage", {}).get("total_tokens", 0),
"model": self.model,
"temperature": self.temperature
}
else:
logger.error("字幕分析失败: 未获取到有效响应")
return {
"status": "error",
"message": "未获取到有效响应",
"temperature": self.temperature
}
else:
error_msg = f"请求失败,状态码: {response.status_code}, 响应: {response.text}"
logger.error(error_msg)
return {
"status": "error",
"message": error_msg,
"temperature": self.temperature
}
except Exception as e:
logger.error(f"字幕分析过程中发生错误: {str(e)}")
return {
"status": "error",
"message": str(e),
"temperature": self.temperature
}
def analyze_subtitle_from_file(self, subtitle_file_path: str) -> Dict[str, Any]:
"""
从文件读取字幕并分析
Args:
subtitle_file_path: 字幕文件的路径
Returns:
Dict[str, Any]: 包含分析结果的字典
"""
try:
# 检查文件是否存在
if not os.path.exists(subtitle_file_path):
return {
"status": "error",
"message": f"字幕文件不存在: {subtitle_file_path}",
"temperature": self.temperature
}
# 读取文件内容
with open(subtitle_file_path, 'r', encoding='utf-8') as f:
subtitle_content = f.read()
# 分析字幕
return self.analyze_subtitle(subtitle_content)
except Exception as e:
logger.error(f"从文件读取字幕并分析过程中发生错误: {str(e)}")
return {
"status": "error",
"message": str(e),
"temperature": self.temperature
}
def save_analysis_result(self, analysis_result: Dict[str, Any], output_path: Optional[str] = None) -> str:
"""
保存分析结果到文件
Args:
analysis_result: 分析结果
output_path: 输出文件路径如果不提供则自动生成
Returns:
str: 输出文件的路径
"""
try:
# 如果未提供输出路径,则自动生成
if not output_path:
output_dir = storage_dir("drama_analysis", create=True)
output_path = os.path.join(output_dir, f"analysis_{get_uuid(True)}.txt")
# 确保目录存在
os.makedirs(os.path.dirname(output_path), exist_ok=True)
# 保存结果
with open(output_path, 'w', encoding='utf-8') as f:
if analysis_result["status"] == "success":
f.write(analysis_result["analysis"])
else:
f.write(f"分析失败: {analysis_result['message']}")
logger.info(f"分析结果已保存到: {output_path}")
return output_path
except Exception as e:
logger.error(f"保存分析结果时发生错误: {str(e)}")
return ""
def generate_narration_script(self, short_name:str, plot_analysis: str, temperature: float = 0.7) -> Dict[str, Any]:
"""
根据剧情分析生成解说文案
Args:
short_name: 短剧名称
plot_analysis: 剧情分析内容
temperature: 生成温度控制创造性默认0.7
Returns:
Dict[str, Any]: 包含生成结果的字典
"""
try:
# 构建完整提示词
prompt = plot_writing % (short_name, plot_analysis)
# 构建请求体数据
payload = {
"model": self.model,
"messages": [
{"role": "system", "content": "你是一位专业的短视频解说脚本撰写专家。"},
{"role": "user", "content": prompt}
],
"temperature": temperature
}
# 对特定模型添加响应格式设置
if self.model not in ["deepseek-reasoner"]:
payload["response_format"] = {"type": "json_object"}
# 构建请求地址
url = f"{self.base_url}/chat/completions"
# 发送HTTP请求
response = requests.post(url, headers=self.headers, json=payload)
# 解析响应
if response.status_code == 200:
response_data = response.json()
# 提取响应内容
if "choices" in response_data and len(response_data["choices"]) > 0:
narration_script = response_data["choices"][0]["message"]["content"]
logger.debug(f"解说文案生成完成消耗的tokens: {response_data.get('usage', {}).get('total_tokens', 0)}")
# 返回结果
return {
"status": "success",
"narration_script": narration_script,
"tokens_used": response_data.get("usage", {}).get("total_tokens", 0),
"model": self.model,
"temperature": self.temperature
}
else:
logger.error("解说文案生成失败: 未获取到有效响应")
return {
"status": "error",
"message": "未获取到有效响应",
"temperature": self.temperature
}
else:
error_msg = f"请求失败,状态码: {response.status_code}, 响应: {response.text}"
logger.error(error_msg)
return {
"status": "error",
"message": error_msg,
"temperature": self.temperature
}
except Exception as e:
logger.error(f"解说文案生成过程中发生错误: {str(e)}")
return {
"status": "error",
"message": str(e),
"temperature": self.temperature
}
def save_narration_script(self, narration_result: Dict[str, Any], output_path: Optional[str] = None) -> str:
"""
保存解说文案到文件
Args:
narration_result: 解说文案生成结果
output_path: 输出文件路径如果不提供则自动生成
Returns:
str: 输出文件的路径
"""
try:
# 如果未提供输出路径,则自动生成
if not output_path:
output_dir = storage_dir("narration_scripts", create=True)
output_path = os.path.join(output_dir, f"narration_{get_uuid(True)}.json")
# 确保目录存在
os.makedirs(os.path.dirname(output_path), exist_ok=True)
# 保存结果
with open(output_path, 'w', encoding='utf-8') as f:
if narration_result["status"] == "success":
f.write(narration_result["narration_script"])
else:
f.write(f"生成失败: {narration_result['message']}")
logger.info(f"解说文案已保存到: {output_path}")
return output_path
except Exception as e:
logger.error(f"保存解说文案时发生错误: {str(e)}")
return ""
def analyze_subtitle(
subtitle_content: str = None,
subtitle_file_path: str = None,
api_key: Optional[str] = None,
model: Optional[str] = None,
base_url: Optional[str] = None,
custom_prompt: Optional[str] = None,
temperature: float = 1.0,
save_result: bool = False,
output_path: Optional[str] = None
) -> Dict[str, Any]:
"""
分析字幕内容的便捷函数
Args:
subtitle_content: 字幕内容文本
subtitle_file_path: 字幕文件路径
custom_prompt: 自定义提示词
api_key: API密钥
model: 模型名称
base_url: API基础URL
temperature: 模型温度
save_result: 是否保存结果到文件
output_path: 输出文件路径
Returns:
Dict[str, Any]: 包含分析结果的字典
"""
# 初始化分析器
analyzer = SubtitleAnalyzer(
temperature=temperature,
api_key=api_key,
model=model,
base_url=base_url,
custom_prompt=custom_prompt
)
logger.debug(f"使用模型: {analyzer.model} 开始分析, 温度: {analyzer.temperature}")
# 分析字幕
if subtitle_content:
result = analyzer.analyze_subtitle(subtitle_content)
elif subtitle_file_path:
result = analyzer.analyze_subtitle_from_file(subtitle_file_path)
else:
return {
"status": "error",
"message": "必须提供字幕内容或字幕文件路径",
"temperature": temperature
}
# 保存结果
if save_result and result["status"] == "success":
result["output_path"] = analyzer.save_analysis_result(result, output_path)
return result
def generate_narration_script(
short_name: str = None,
plot_analysis: str = None,
api_key: Optional[str] = None,
model: Optional[str] = None,
base_url: Optional[str] = None,
temperature: float = 1.0,
save_result: bool = False,
output_path: Optional[str] = None
) -> Dict[str, Any]:
"""
根据剧情分析生成解说文案的便捷函数
Args:
short_name: 短剧名称
plot_analysis: 剧情分析内容直接提供
api_key: API密钥
model: 模型名称
base_url: API基础URL
temperature: 生成温度控制创造性
save_result: 是否保存结果到文件
output_path: 输出文件路径
Returns:
Dict[str, Any]: 包含生成结果的字典
"""
# 初始化分析器
analyzer = SubtitleAnalyzer(
temperature=temperature,
api_key=api_key,
model=model,
base_url=base_url
)
# 生成解说文案
result = analyzer.generate_narration_script(short_name, plot_analysis, temperature)
# 保存结果
if save_result and result["status"] == "success":
result["output_path"] = analyzer.save_narration_script(result, output_path)
return result
if __name__ == '__main__':
text_api_key = "skxxxx"
text_model = "gemini-2.0-flash"
text_base_url = "https://api.narratoai.cn/v1/chat/completions" # 确保URL不以斜杠结尾便于后续拼接
subtitle_path = "/Users/apple/Desktop/home/NarratoAI/resource/srt/家里家外1-5.srt"
# 示例用法
if subtitle_path:
# 分析字幕总结剧情
analysis_result = analyze_subtitle(
subtitle_file_path=subtitle_path,
api_key=text_api_key,
model=text_model,
base_url=text_base_url,
save_result=True
)
if analysis_result["status"] == "success":
print("字幕分析成功!")
print("分析结果:")
print(analysis_result["analysis"])
# 根据剧情生成解说文案
narration_result = generate_narration_script(
plot_analysis=analysis_result["analysis"],
api_key=text_api_key,
model=text_model,
base_url=text_base_url,
save_result=True
)
if narration_result["status"] == "success":
print("\n解说文案生成成功!")
print("解说文案:")
print(narration_result["narration_script"])
else:
print(f"\n解说文案生成失败: {narration_result['message']}")
else:
print(f"分析失败: {analysis_result['message']}")

View File

@ -0,0 +1,37 @@
"""
视频脚本生成pipeline串联各个处理步骤
"""
import os
from .utils.step1_subtitle_analyzer_openai import analyze_subtitle
from .utils.step5_merge_script import merge_script
def generate_script(srt_path: str, api_key: str, model_name: str, output_path: str, base_url: str = None, custom_clips: int = 5):
"""生成视频混剪脚本
Args:
srt_path: 字幕文件路径
output_path: 输出文件路径可选
Returns:
str: 生成的脚本内容
"""
# 验证输入文件
if not os.path.exists(srt_path):
raise FileNotFoundError(f"字幕文件不存在: {srt_path}")
# 分析字幕
print("开始分析...")
openai_analysis = analyze_subtitle(
srt_path=srt_path,
api_key=api_key,
model_name=model_name,
base_url=base_url,
custom_clips=custom_clips
)
# 合并生成最终脚本
adjusted_results = openai_analysis['plot_points']
final_script = merge_script(adjusted_results, output_path)
return final_script

View File

@ -0,0 +1,60 @@
"""
定义项目中使用的数据类型
"""
from typing import List, Dict, Optional
from dataclasses import dataclass
@dataclass
class PlotPoint:
timestamp: str
title: str
picture: str
@dataclass
class Commentary:
timestamp: str
title: str
copywriter: str
@dataclass
class SubtitleSegment:
start_time: float
end_time: float
text: str
@dataclass
class ScriptItem:
timestamp: str
title: str
picture: str
copywriter: str
@dataclass
class PipelineResult:
output_video_path: str
plot_points: List[PlotPoint]
subtitle_segments: List[SubtitleSegment]
commentaries: List[Commentary]
final_script: List[ScriptItem]
error: Optional[str] = None
class VideoProcessingError(Exception):
pass
class SubtitleProcessingError(Exception):
pass
class PlotAnalysisError(Exception):
pass
class CopywritingError(Exception):
pass

View File

@ -0,0 +1,157 @@
"""
使用OpenAI API分析字幕文件返回剧情梗概和爆点
"""
import traceback
from openai import OpenAI, BadRequestError
import os
import json
from .utils import load_srt
def analyze_subtitle(
srt_path: str,
model_name: str,
api_key: str = None,
base_url: str = None,
custom_clips: int = 5
) -> dict:
"""分析字幕内容,返回完整的分析结果
Args:
srt_path (str): SRT字幕文件路径
api_key (str, optional): 大模型API密钥. Defaults to None.
model_name (str, optional): 大模型名称. Defaults to "gpt-4o-2024-11-20".
base_url (str, optional): 大模型API基础URL. Defaults to None.
Returns:
dict: 包含剧情梗概和结构化的时间段分析的字典
"""
try:
# 加载字幕文件
subtitles = load_srt(srt_path)
subtitle_content = "\n".join([f"{sub['timestamp']}\n{sub['text']}" for sub in subtitles])
# 初始化客户端
global client
if "deepseek" in model_name.lower():
client = OpenAI(
api_key=api_key or os.getenv('DeepSeek_API_KEY'),
base_url="https://api.siliconflow.cn/v1" # 使用第三方 硅基流动 API
)
else:
client = OpenAI(
api_key=api_key or os.getenv('OPENAI_API_KEY'),
base_url=base_url
)
messages = [
{
"role": "system",
"content": """你是一名经验丰富的短剧编剧,擅长根据字幕内容按照先后顺序分析关键剧情,并找出 %s 个关键片段。
请返回一个JSON对象包含以下字段
{
"summary": "整体剧情梗概",
"plot_titles": [
"关键剧情1",
"关键剧情2",
"关键剧情3",
"关键剧情4",
"关键剧情5",
"..."
]
}
请确保返回的是合法的JSON格式, 请确保返回的是 %s 个片段
""" % (custom_clips, custom_clips)
},
{
"role": "user",
"content": f"srt字幕如下{subtitle_content}"
}
]
# DeepSeek R1 和 V3 不支持 response_format=json_object
try:
completion = client.chat.completions.create(
model=model_name,
messages=messages,
response_format={"type": "json_object"}
)
summary_data = json.loads(completion.choices[0].message.content)
except BadRequestError as e:
completion = client.chat.completions.create(
model=model_name,
messages=messages
)
# 去除 completion 字符串前的 ```json 和 结尾的 ```
completion = completion.choices[0].message.content.replace("```json", "").replace("```", "")
summary_data = json.loads(completion)
except Exception as e:
raise Exception(f"大模型解析发生错误:{str(e)}\n{traceback.format_exc()}")
print(json.dumps(summary_data, indent=4, ensure_ascii=False))
# 获取爆点时间段分析
prompt = f"""剧情梗概:
{summary_data['summary']}
需要定位的爆点内容
"""
print(f"找到 {len(summary_data['plot_titles'])} 个片段")
for i, point in enumerate(summary_data['plot_titles'], 1):
prompt += f"{i}. {point}\n"
messages = [
{
"role": "system",
"content": """你是一名短剧编剧,非常擅长根据字幕中分析视频中关键剧情出现的具体时间段。
请仔细阅读剧情梗概和爆点内容然后在字幕中找出每个爆点发生的具体时间段和爆点前后的详细剧情
请返回一个JSON对象包含一个名为"plot_points"的数组数组中包含多个对象每个对象都要包含以下字段
{
"plot_points": [
{
"timestamp": "时间段格式为xx:xx:xx,xxx-xx:xx:xx,xxx",
"title": "关键剧情的主题",
"picture": "关键剧情前后的详细剧情描述"
}
]
}
请确保返回的是合法的JSON格式"""
},
{
"role": "user",
"content": f"""字幕内容:
{subtitle_content}
{prompt}"""
}
]
# DeepSeek R1 和 V3 不支持 response_format=json_object
try:
completion = client.chat.completions.create(
model=model_name,
messages=messages,
response_format={"type": "json_object"}
)
plot_points_data = json.loads(completion.choices[0].message.content)
except BadRequestError as e:
completion = client.chat.completions.create(
model=model_name,
messages=messages
)
# 去除 completion 字符串前的 ```json 和 结尾的 ```
completion = completion.choices[0].message.content.replace("```json", "").replace("```", "")
plot_points_data = json.loads(completion)
except Exception as e:
raise Exception(f"大模型解析错误:{str(e)}\n{traceback.format_exc()}")
print(json.dumps(plot_points_data, indent=4, ensure_ascii=False))
# 合并结果
return {
"plot_summary": summary_data,
"plot_points": plot_points_data["plot_points"]
}
except Exception as e:
raise Exception(f"分析字幕时发生错误:{str(e)}\n{traceback.format_exc()}")

View File

@ -0,0 +1,69 @@
"""
合并生成最终脚本
"""
import os
import json
from typing import List, Dict, Tuple
def merge_script(
plot_points: List[Dict],
output_path: str
):
"""合并生成最终脚本
Args:
plot_points: 校对后的剧情点
output_path: 输出文件路径如果提供则保存到文件
Returns:
str: 最终合并的脚本
"""
def parse_timestamp(ts: str) -> Tuple[float, float]:
"""解析时间戳,返回开始和结束时间(秒)"""
start, end = ts.split('-')
def parse_time(time_str: str) -> float:
time_str = time_str.strip()
if ',' in time_str:
time_parts, ms_parts = time_str.split(',')
ms = float(ms_parts) / 1000
else:
time_parts = time_str
ms = 0
hours, minutes, seconds = map(int, time_parts.split(':'))
return hours * 3600 + minutes * 60 + seconds + ms
return parse_time(start), parse_time(end)
def format_timestamp(seconds: float) -> str:
"""将秒数转换为时间戳格式 HH:MM:SS"""
hours = int(seconds // 3600)
minutes = int((seconds % 3600) // 60)
secs = int(seconds % 60)
return f"{hours:02d}:{minutes:02d}:{secs:02d}"
# 创建包含所有信息的临时列表
final_script = []
# 处理原生画面条目
number = 1
for plot_point in plot_points:
start, end = parse_timestamp(plot_point["timestamp"])
script_item = {
"_id": number,
"timestamp": plot_point["timestamp"],
"picture": plot_point["picture"],
"narration": f"播放原生_{os.urandom(4).hex()}",
"OST": 1, # OST=0 仅保留解说 OST=2 保留解说和原声
}
final_script.append(script_item)
number += 1
# 保存结果
with open(output_path, 'w', encoding='utf-8') as f:
json.dump(final_script, f, ensure_ascii=False, indent=4)
print(f"脚本生成完成:{output_path}")
return final_script

View File

@ -0,0 +1,45 @@
# 公共方法
import json
import requests # 新增
from typing import List, Dict
def load_srt(file_path: str) -> List[Dict]:
"""加载并解析SRT文件
Args:
file_path: SRT文件路径
Returns:
字幕内容列表
"""
with open(file_path, 'r', encoding='utf-8-sig') as f:
content = f.read().strip()
# 按空行分割字幕块
subtitle_blocks = content.split('\n\n')
subtitles = []
for block in subtitle_blocks:
lines = block.split('\n')
if len(lines) >= 3: # 确保块包含足够的行
try:
number = int(lines[0].strip())
timestamp = lines[1]
text = ' '.join(lines[2:])
# 解析时间戳
start_time, end_time = timestamp.split(' --> ')
subtitles.append({
'number': number,
'timestamp': timestamp,
'text': text,
'start_time': start_time,
'end_time': end_time
})
except ValueError as e:
print(f"Warning: 跳过无效的字幕块: {e}")
continue
return subtitles

Binary file not shown.

Binary file not shown.

View File

@ -18,15 +18,14 @@ def check_ffmpeg():
return False
def merge_audio_files(task_id: str, audio_files: list, total_duration: float, list_script: list):
def merge_audio_files(task_id: str, total_duration: float, list_script: list):
"""
合并音频文件根据OST设置处理不同的音频轨道
合并音频文件
Args:
task_id: 任务ID
audio_files: TTS生成的音频文件列表
total_duration: 总时长
list_script: 完整脚本信息包含OST设置
list_script: 完整脚本信息包含duration时长和audio路径
Returns:
str: 合并后的音频文件路径
@ -39,36 +38,38 @@ def merge_audio_files(task_id: str, audio_files: list, total_duration: float, li
# 创建一个空的音频片段
final_audio = AudioSegment.silent(duration=total_duration * 1000) # 总时长以毫秒为单位
# 计算每个片段的开始位置基于duration字段
current_position = 0 # 初始位置(秒)
# 遍历脚本中的每个片段
for segment, audio_file in zip(list_script, audio_files):
for segment in list_script:
try:
# 加载TTS音频文件
tts_audio = AudioSegment.from_file(audio_file)
# 获取片段的开始和结束时间
start_time, end_time = segment['new_timestamp'].split('-')
start_seconds = utils.time_to_seconds(start_time)
end_seconds = utils.time_to_seconds(end_time)
# 根据OST设置处理音频
if segment['OST'] == 0:
# 只使用TTS音频
final_audio = final_audio.overlay(tts_audio, position=start_seconds * 1000)
elif segment['OST'] == 1:
# 只使用原声(假设原声已经在视频中)
continue
elif segment['OST'] == 2:
# 混合TTS音频和原声
original_audio = AudioSegment.silent(duration=(end_seconds - start_seconds) * 1000)
mixed_audio = original_audio.overlay(tts_audio)
final_audio = final_audio.overlay(mixed_audio, position=start_seconds * 1000)
# 获取片段时长(秒)
duration = segment['duration']
# 检查audio字段是否为空
if segment['audio'] and os.path.exists(segment['audio']):
# 加载TTS音频文件
tts_audio = AudioSegment.from_file(segment['audio'])
# 将TTS音频添加到最终音频
final_audio = final_audio.overlay(tts_audio, position=current_position * 1000)
else:
# audio为空不添加音频仅保留间隔
logger.info(f"片段 {segment.get('timestamp', '')} 没有音频文件,保留 {duration} 秒的间隔")
# 更新下一个片段的开始位置
current_position += duration
except Exception as e:
logger.error(f"处理音频文件 {audio_file} 时出错: {str(e)}")
logger.error(f"处理音频片段时出错: {str(e)}")
# 即使处理失败,也要更新位置,确保后续片段位置正确
if 'duration' in segment:
current_position += segment['duration']
continue
# 保存合并后的音频文件
output_audio_path = os.path.join(utils.task_dir(task_id), "final_audio.mp3")
output_audio_path = os.path.join(utils.task_dir(task_id), "merger_audio.mp3")
final_audio.export(output_audio_path, format="mp3")
logger.info(f"合并后的音频文件已保存: {output_audio_path}")
@ -93,7 +94,7 @@ def time_to_seconds(time_str):
# 分割时间部分
parts = time_part.split(':')
if len(parts) == 3: # HH:MM:SS
h, m, s = map(int, parts)
seconds = h * 3600 + m * 60 + s
@ -118,11 +119,11 @@ def extract_timestamp(filename):
# 从文件名中提取时间部分
time_part = filename.split('_', 1)[1].split('.')[0] # 获取 "00_06,500-00_24,800" 部分
start_time, end_time = time_part.split('-') # 分割成开始和结束时间
# 将下划线格式转换回冒号格式
start_time = start_time.replace('_', ':')
end_time = end_time.replace('_', ':')
# 将时间戳转换为秒
start_seconds = time_to_seconds(start_time)
end_seconds = time_to_seconds(end_time)
@ -135,17 +136,36 @@ def extract_timestamp(filename):
if __name__ == "__main__":
# 示例用法
audio_files =[
"/Users/apple/Desktop/home/NarratoAI/storage/tasks/test456/audio_00:06-00:24.mp3",
"/Users/apple/Desktop/home/NarratoAI/storage/tasks/test456/audio_00:32-00:38.mp3",
"/Users/apple/Desktop/home/NarratoAI/storage/tasks/test456/audio_00:43-00:52.mp3",
"/Users/apple/Desktop/home/NarratoAI/storage/tasks/test456/audio_00:52-01:09.mp3",
"/Users/apple/Desktop/home/NarratoAI/storage/tasks/test456/audio_01:13-01:15.mp3",
]
total_duration = 38
video_script_path = "/Users/apple/Desktop/home/NarratoAI/resource/scripts/test003.json"
with open(video_script_path, "r", encoding="utf-8") as f:
video_script = json.load(f)
total_duration = 90
output_file = merge_audio_files("test456", audio_files, total_duration, video_script)
video_script = [
{'picture': '【解说】好的,各位,欢迎回到我的频道!《庆余年 2》刚开播就给了我们一个王炸范闲在北齐""了?这怎么可能!',
'timestamp': '00:00:00-00:00:26',
'narration': '好的各位,欢迎回到我的频道!《庆余年 2》刚开播就给了我们一个王炸范闲在北齐""了?这怎么可能!上集片尾那个巨大的悬念,这一集就立刻揭晓了!范闲假死归来,他面临的第一个,也是最大的难关,就是如何面对他最敬爱的,同时也是最可怕的那个人——庆帝!',
'OST': 0, 'duration': 26,
'audio': '/Users/apple/Desktop/home/NarratoAI/storage/tasks/qyn2-2-demo/audio_00_00_00-00_01_15.mp3'},
{'picture': '【解说】上一集我们看到,范闲在北齐遭遇了惊天变故,生死不明!', 'timestamp': '00:01:15-00:01:29',
'narration': '但我们都知道,他绝不可能就这么轻易退场!第二集一开场,范闲就已经秘密回到了京都。他的生死传闻,可不像我们想象中那样只是小范围流传,而是…',
'OST': 0, 'duration': 14,
'audio': '/Users/apple/Desktop/home/NarratoAI/storage/tasks/qyn2-2-demo/audio_00_01_15-00_04_40.mp3'},
{'picture': '画面切到王启年小心翼翼地向范闲汇报。', 'timestamp': '00:04:41-00:04:58',
'narration': '我发现大人的死讯不光是在民间,在官场上也它传开了,所以呢,所以啊,可不是什么好事,将来您跟陛下怎么交代,这可是欺君之罪',
'OST': 1, 'duration': 17,
'audio': ''},
{'picture': '【解说】"欺君之罪"!在封建王朝,这可是抄家灭族的大罪!搁一般人,肯定脚底抹油溜之大吉了。',
'timestamp': '00:04:58-00:05:20',
'narration': '"欺君之罪"!在封建王朝,这可是抄家灭族的大罪!搁一般人,肯定脚底抹油溜之大吉了。但范闲是谁啊?他偏要反其道而行之!他竟然决定,直接去见庆帝!冒着天大的风险,用"假死"这个事实去赌庆帝的态度!',
'OST': 0, 'duration': 22,
'audio': '/Users/apple/Desktop/home/NarratoAI/storage/tasks/qyn2-2-demo/audio_00_04_58-00_05_45.mp3'},
{'picture': '【解说】但想见庆帝,哪有那么容易?范闲艺高人胆大,竟然选择了最激进的方式——闯宫!',
'timestamp': '00:05:45-00:05:53',
'narration': '但想见庆帝,哪有那么容易?范闲艺高人胆大,竟然选择了最激进的方式——闯宫!',
'OST': 0, 'duration': 8,
'audio': '/Users/apple/Desktop/home/NarratoAI/storage/tasks/qyn2-2-demo/audio_00_05_45-00_06_00.mp3'},
{'picture': '画面切换到范闲蒙面闯入皇宫,被侍卫包围的场景。', 'timestamp': '00:06:00-00:06:03',
'narration': '抓刺客',
'OST': 1, 'duration': 3,
'audio': ''}]
output_file = merge_audio_files("test456", total_duration, video_script)
print(output_file)

237
app/services/clip_video.py Normal file
View File

@ -0,0 +1,237 @@
#!/usr/bin/env python
# -*- coding: UTF-8 -*-
'''
@Project: NarratoAI
@File : clip_video
@Author : 小林同学
@Date : 2025/5/6 下午6:14
'''
import os
import subprocess
import json
import hashlib
from loguru import logger
from typing import Dict, List, Optional
from pathlib import Path
from app.utils import ffmpeg_utils
def parse_timestamp(timestamp: str) -> tuple:
"""
解析时间戳字符串返回开始和结束时间
Args:
timestamp: 格式为'HH:MM:SS-HH:MM:SS''HH:MM:SS,sss-HH:MM:SS,sss'的时间戳字符串
Returns:
tuple: (开始时间, 结束时间) 格式为'HH:MM:SS''HH:MM:SS,sss'
"""
start_time, end_time = timestamp.split('-')
return start_time, end_time
def calculate_end_time(start_time: str, duration: float, extra_seconds: float = 1.0) -> str:
"""
根据开始时间和持续时间计算结束时间
Args:
start_time: 开始时间格式为'HH:MM:SS''HH:MM:SS,sss'(带毫秒)
duration: 持续时间单位为秒
extra_seconds: 额外添加的秒数默认为1秒
Returns:
str: 计算后的结束时间格式与输入格式相同
"""
# 检查是否包含毫秒
has_milliseconds = ',' in start_time
milliseconds = 0
if has_milliseconds:
time_part, ms_part = start_time.split(',')
h, m, s = map(int, time_part.split(':'))
milliseconds = int(ms_part)
else:
h, m, s = map(int, start_time.split(':'))
# 转换为总毫秒数
total_milliseconds = ((h * 3600 + m * 60 + s) * 1000 + milliseconds +
int((duration + extra_seconds) * 1000))
# 计算新的时、分、秒、毫秒
ms_new = total_milliseconds % 1000
total_seconds = total_milliseconds // 1000
h_new = int(total_seconds // 3600)
m_new = int((total_seconds % 3600) // 60)
s_new = int(total_seconds % 60)
# 返回与输入格式一致的时间字符串
if has_milliseconds:
return f"{h_new:02d}:{m_new:02d}:{s_new:02d},{ms_new:03d}"
else:
return f"{h_new:02d}:{m_new:02d}:{s_new:02d}"
def check_hardware_acceleration() -> Optional[str]:
"""
检查系统支持的硬件加速选项
Returns:
Optional[str]: 硬件加速参数如果不支持则返回None
"""
# 使用集中式硬件加速检测
return ffmpeg_utils.get_ffmpeg_hwaccel_type()
def clip_video(
video_origin_path: str,
tts_result: List[Dict],
output_dir: Optional[str] = None,
task_id: Optional[str] = None
) -> Dict[str, str]:
"""
根据时间戳裁剪视频
Args:
video_origin_path: 原始视频的路径
tts_result: 包含时间戳和持续时间信息的列表
output_dir: 输出目录路径默认为None时会自动生成
task_id: 任务ID用于生成唯一的输出目录默认为None时会自动生成
Returns:
Dict[str, str]: 时间戳到裁剪后视频路径的映射
"""
# 检查视频文件是否存在
if not os.path.exists(video_origin_path):
raise FileNotFoundError(f"视频文件不存在: {video_origin_path}")
# 如果未提供task_id则根据输入生成一个唯一ID
if task_id is None:
content_for_hash = f"{video_origin_path}_{json.dumps(tts_result)}"
task_id = hashlib.md5(content_for_hash.encode()).hexdigest()
# 设置输出目录
if output_dir is None:
output_dir = os.path.join(
os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))),
"storage", "temp", "clip_video", task_id
)
# 确保输出目录存在
Path(output_dir).mkdir(parents=True, exist_ok=True)
# 获取硬件加速支持
hwaccel = check_hardware_acceleration()
hwaccel_args = []
if hwaccel:
hwaccel_args = ffmpeg_utils.get_ffmpeg_hwaccel_args()
# 存储裁剪结果
result = {}
for item in tts_result:
_id = item.get("_id", item.get("timestamp", "unknown"))
timestamp = item["timestamp"]
start_time, _ = parse_timestamp(timestamp)
# 根据持续时间计算真正的结束时间加上1秒余量
duration = item["duration"]
calculated_end_time = calculate_end_time(start_time, duration)
# 转换为FFmpeg兼容的时间格式逗号替换为点
ffmpeg_start_time = start_time.replace(',', '.')
ffmpeg_end_time = calculated_end_time.replace(',', '.')
# 格式化输出文件名(使用连字符替代冒号和逗号)
safe_start_time = start_time.replace(':', '-').replace(',', '-')
safe_end_time = calculated_end_time.replace(':', '-').replace(',', '-')
output_filename = f"vid_{safe_start_time}@{safe_end_time}.mp4"
output_path = os.path.join(output_dir, output_filename)
# 构建FFmpeg命令
ffmpeg_cmd = [
"ffmpeg", "-y", *hwaccel_args,
"-i", video_origin_path,
"-ss", ffmpeg_start_time,
"-to", ffmpeg_end_time,
"-c:v", "h264_videotoolbox" if hwaccel == "videotoolbox" else "libx264",
"-c:a", "aac",
"-strict", "experimental",
output_path
]
# 执行FFmpeg命令
try:
logger.info(f"裁剪视频片段: {timestamp} -> {ffmpeg_start_time}{ffmpeg_end_time}")
# logger.debug(f"执行命令: {' '.join(ffmpeg_cmd)}")
# 在Windows系统上使用UTF-8编码处理输出避免GBK编码错误
is_windows = os.name == 'nt'
if is_windows:
process = subprocess.run(
ffmpeg_cmd,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
encoding='utf-8', # 明确指定编码为UTF-8
text=True,
check=True
)
else:
process = subprocess.run(
ffmpeg_cmd,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
text=True,
check=True
)
result[_id] = output_path
except subprocess.CalledProcessError as e:
logger.error(f"裁剪视频片段失败: {timestamp}")
logger.error(f"错误信息: {e.stderr}")
raise RuntimeError(f"视频裁剪失败: {e.stderr}")
return result
if __name__ == "__main__":
video_origin_path = "/Users/apple/Desktop/home/NarratoAI/resource/videos/qyn2-2无片头片尾.mp4"
tts_result = [{'timestamp': '00:00:00-00:01:15',
'audio_file': '/Users/apple/Desktop/home/NarratoAI/storage/tasks/qyn2-2-demo/audio_00_00_00-00_01_15.mp3',
'subtitle_file': '/Users/apple/Desktop/home/NarratoAI/storage/tasks/qyn2-2-demo/subtitle_00_00_00-00_01_15.srt',
'duration': 25.55,
'text': '好的各位,欢迎回到我的频道!《庆余年 2》刚开播就给了我们一个王炸范闲在北齐""了?这怎么可能!上集片尾那个巨大的悬念,这一集就立刻揭晓了!范闲假死归来,他面临的第一个,也是最大的难关,就是如何面对他最敬爱的,同时也是最可怕的那个人——庆帝!'},
{'timestamp': '00:01:15-00:04:40',
'audio_file': '/Users/apple/Desktop/home/NarratoAI/storage/tasks/qyn2-2-demo/audio_00_01_15-00_04_40.mp3',
'subtitle_file': '/Users/apple/Desktop/home/NarratoAI/storage/tasks/qyn2-2-demo/subtitle_00_01_15-00_04_40.srt',
'duration': 13.488,
'text': '但我们都知道,他绝不可能就这么轻易退场!第二集一开场,范闲就已经秘密回到了京都。他的生死传闻,可不像我们想象中那样只是小范围流传,而是…'},
{'timestamp': '00:04:58-00:05:45',
'audio_file': '/Users/apple/Desktop/home/NarratoAI/storage/tasks/qyn2-2-demo/audio_00_04_58-00_05_45.mp3',
'subtitle_file': '/Users/apple/Desktop/home/NarratoAI/storage/tasks/qyn2-2-demo/subtitle_00_04_58-00_05_45.srt',
'duration': 21.363,
'text': '"欺君之罪"!在封建王朝,这可是抄家灭族的大罪!搁一般人,肯定脚底抹油溜之大吉了。但范闲是谁啊?他偏要反其道而行之!他竟然决定,直接去见庆帝!冒着天大的风险,用"假死"这个事实去赌庆帝的态度!'},
{'timestamp': '00:05:45-00:06:00',
'audio_file': '/Users/apple/Desktop/home/NarratoAI/storage/tasks/qyn2-2-demo/audio_00_05_45-00_06_00.mp3',
'subtitle_file': '/Users/apple/Desktop/home/NarratoAI/storage/tasks/qyn2-2-demo/subtitle_00_05_45-00_06_00.srt',
'duration': 7.675, 'text': '但想见庆帝,哪有那么容易?范闲艺高人胆大,竟然选择了最激进的方式——闯宫!'}]
subclip_path_videos = {
'00:00:00-00:01:15': '/Users/apple/Desktop/home/NarratoAI/storage/temp/clip_video/6e7e343c7592c7d6f9a9636b55000f23/vid-00-00-00-00-01-15.mp4',
'00:01:15-00:04:40': '/Users/apple/Desktop/home/NarratoAI/storage/temp/clip_video/6e7e343c7592c7d6f9a9636b55000f23/vid-00-01-15-00-04-40.mp4',
'00:04:41-00:04:58': '/Users/apple/Desktop/home/NarratoAI/storage/temp/clip_video/6e7e343c7592c7d6f9a9636b55000f23/vid-00-04-41-00-04-58.mp4',
'00:04:58-00:05:45': '/Users/apple/Desktop/home/NarratoAI/storage/temp/clip_video/6e7e343c7592c7d6f9a9636b55000f23/vid-00-04-58-00-05-45.mp4',
'00:05:45-00:06:00': '/Users/apple/Desktop/home/NarratoAI/storage/temp/clip_video/6e7e343c7592c7d6f9a9636b55000f23/vid-00-05-45-00-06-00.mp4',
'00:06:00-00:06:03': '/Users/apple/Desktop/home/NarratoAI/storage/temp/clip_video/6e7e343c7592c7d6f9a9636b55000f23/vid-00-06-00-00-06-03.mp4',
}
# 使用方法示例
try:
result = clip_video(video_origin_path, tts_result, subclip_path_videos)
print("裁剪结果:")
print(json.dumps(result, indent=4, ensure_ascii=False))
except Exception as e:
print(f"发生错误: {e}")

View File

@ -0,0 +1,264 @@
#!/usr/bin/env python
# -*- coding: UTF-8 -*-
'''
@Project: NarratoAI
@File : 生成介绍文案
@Author : 小林同学
@Date : 2025/5/8 上午11:33
'''
import json
import os
import traceback
from openai import OpenAI
from loguru import logger
def parse_frame_analysis_to_markdown(json_file_path):
"""
解析视频帧分析JSON文件并转换为Markdown格式
:param json_file_path: JSON文件路径
:return: Markdown格式的字符串
"""
# 检查文件是否存在
if not os.path.exists(json_file_path):
return f"错误: 文件 {json_file_path} 不存在"
try:
# 读取JSON文件
with open(json_file_path, 'r', encoding='utf-8') as file:
data = json.load(file)
# 初始化Markdown字符串
markdown = ""
# 获取总结和帧观察数据
summaries = data.get('overall_activity_summaries', [])
frame_observations = data.get('frame_observations', [])
# 按批次组织数据
batch_frames = {}
for frame in frame_observations:
batch_index = frame.get('batch_index')
if batch_index not in batch_frames:
batch_frames[batch_index] = []
batch_frames[batch_index].append(frame)
# 生成Markdown内容
for i, summary in enumerate(summaries, 1):
batch_index = summary.get('batch_index')
time_range = summary.get('time_range', '')
batch_summary = summary.get('summary', '')
markdown += f"## 片段 {i}\n"
markdown += f"- 时间范围:{time_range}\n"
# 添加片段描述
markdown += f"- 片段描述:{batch_summary}\n" if batch_summary else f"- 片段描述:\n"
markdown += "- 详细描述:\n"
# 添加该批次的帧观察详情
frames = batch_frames.get(batch_index, [])
for frame in frames:
timestamp = frame.get('timestamp', '')
observation = frame.get('observation', '')
# 直接使用原始文本,不进行分割
markdown += f" - {timestamp}: {observation}\n" if observation else f" - {timestamp}: \n"
markdown += "\n"
return markdown
except Exception as e:
return f"处理JSON文件时出错: {traceback.format_exc()}"
def generate_narration(markdown_content, api_key, base_url, model):
"""
调用OpenAI API根据视频帧分析的Markdown内容生成解说文案
:param markdown_content: Markdown格式的视频帧分析内容
:param api_key: OpenAI API密钥
:param base_url: API基础URL如果使用非官方API
:param model: 使用的模型名称
:return: 生成的解说文案
"""
try:
# 构建提示词
prompt = """
我是一名荒野建造解说的博主以下是一些同行的对标文案请你深度学习并总结这些文案的风格特点跟内容特点
<example_text_1>
解压助眠的天花板就是荒野建造沉浸丝滑的搭建过程可以说每一帧都是极致享受我保证强迫症来了都找不出一丁点毛病更别说全屋严丝合缝的拼接工艺还能轻松抵御零下二十度气温让你居住的每一天都温暖如春
在家闲不住的西姆今天也打算来一次野外建造行走没多久他就发现许多倒塌的树任由它们自生自灭不如将其利用起来想到这他就开始挥舞铲子要把地基挖掘出来虽然每次只能挖一点点但架不住他体能惊人没多长时间一个 2x3 的深坑就赫然出现这深度住他一人绰绰有余
随后他去附近收集来原木这些都是搭建墙壁的最好材料而在投入使用前自然要把表皮刮掉防止森林中的白蚁蛀虫处理好一大堆后西姆还在两端打孔使用木钉固定在一起这可不是用来做墙壁的而是做庇护所的承重柱只要木头间的缝隙足够紧密那搭建出的木屋就能足够坚固
每向上搭建一层他都会在中间塞入苔藓防寒保证不会泄露一丝热量其他几面也是用相同方法很快西姆就做好了三面墙壁每一根木头都极其工整保证强迫症来了都要点个赞再走
在继续搭建墙壁前西姆决定将壁炉制作出来毕竟森林夜晚的气温会很低保暖措施可是重中之重完成后他找来一块大树皮用来充当庇护所的大门而上面刮掉的木屑还能作为壁炉的引火物可以说再完美不过
测试了排烟没问题后他才开始搭建最后一面墙壁这一面要预留门和窗所以在搭建到一半后还需要在原木中间开出卡口让自己劈砍时能轻松许多此时只需将另外一根如法炮制两端拼接在一起后就是一扇大小适中的窗户而随着随后一层苔藓铺好最后一根原木落位这个庇护所的雏形就算完成
大门的安装他没选择用合页而是在底端雕刻出榫头门框上则雕刻出榫眼只能说西姆的眼就是一把尺这完全就是严丝合缝此时他才开始搭建屋顶这里西姆用的方法不同他先把最外围的原木固定好随后将原木平铺在上面就能得到完美的斜面屋顶等他将四周的围栏也装好后工整的屋顶看起来十分舒服西姆躺上去都不想动
稍作休息后他利用剩余的苔藓对屋顶的缝隙处密封可这样西姆觉得不够保险于是他找来一些黏土再次对原本的缝隙二次加工保管这庇护所冬天也暖和最后只需要平铺上枯叶以及挖掘出的泥土整个屋顶就算完成
考虑到庇护所的美观性自然少不了覆盖上苔藓翠绿的颜色看起来十分舒服就连门口的庭院旁他都移植了许多小树做点缀让这木屋与周边环境融为一体西姆才刚完成好这件事一场大雨就骤然降临好在此时的他已经不用淋雨更别说这屋顶防水十分不错室内没一点雨水渗透进来
等待温度回升的过程西姆利用墙壁本身的凹槽把床框镶嵌在上面只需要铺上苔藓以及自带的床单枕头一张完美的单人床就做好辛苦劳作一整天西姆可不会亏待自己他将自带的牛肉腌制好后直接放到壁炉中烤只需要等待三十分钟就能享受这美味的一顿
在辛苦建造一星期后他终于可以在自己搭建的庇护所中享受最纯正的野外露营后面西姆回家补给了一堆物资再次回来时森林已经大雪纷飞让他原本翠绿的小屋更换上了冬季限定皮肤好在内部设施没受什么影响和他离开时一样整洁
就是房间中已经没多少柴火让西姆今天又得劈柴寒冷干燥的天气让木头劈起来十分轻松没多久他就收集到一大堆这些足够燃烧好几天虽然此时外面大雪纷飞但小屋中却开始逐渐温暖这次他除了带来一些食物外还有几瓶调味料以及一整套被褥让自己的居住舒适度提高一大截
而秋天他有收集干草的缘故只需要塞入枕套中密封起来就能作为靠垫用就这居住条件比一般人在家过的还要奢侈趁着壁炉木头变木炭的过程西姆则开始不紧不慢的处理食物他取出一块牛排改好花刀以后撒上一堆调料腌制起来接着用锡纸包裹好放到壁炉中直接炭烤搭配上自带的红酒是一个非常好的选择
随着时间来到第二天外面的积雪融化了不少西姆简单做顿煎蛋补充体力后决定制作一个室外篝火堆用来晚上驱散周边野兽搭建这玩意没什么技巧只需要找到一大堆木棍利用大树的夹缝将其掰弯然后将其堆积在一起就是一个简易版的篝火堆看这外形有点像帐篷好在西姆没想那么多
等待天色暗淡下来后他才来到室外将其点燃顺便处理下多余的废料只可惜这场景没朋友陪在身边对西姆来说可能是个遗憾而哪怕森林只有他一个人都依旧做了好几个小时等到里面的篝火彻底燃尽后西姆还找来雪球覆盖到上面将火熄灭这防火意识可谓十分好最后在室内二十五度的高温下裹着被子睡觉
</example_text_1>
<example_text_2>
解压助眠的天花板就是荒野建造沉浸丝滑的搭建过程每一帧都是极致享受全屋严丝合缝的拼接工艺能轻松抵御零下二十度气温居住体验温暖如春
在家闲不住的西姆开启野外建造他发现倒塌的树决定加以利用先挖掘出 2x3 的深坑作为地基接着收集原木刮掉表皮防白蚁蛀虫打孔用木钉固定制作承重柱搭建墙壁时每一层都塞入苔藓防寒很快做好三面墙
为应对森林夜晚低温西姆制作壁炉用大树皮当大门刮下的木屑做引火物搭建最后一面墙时预留门窗通过在原木中间开口拼接做出窗户大门采用榫卯结构安装严丝合缝
搭建屋顶时先固定外围原木再平铺原木形成斜面屋顶之后用苔藓黏土密封缝隙铺上枯叶和泥土为美观在木屋覆盖苔藓移植小树点缀完工时遇大雨木屋防水良好
西姆利用墙壁凹槽镶嵌床框铺上苔藓床单枕头做成床劳作一天后他用壁炉烤牛肉享用建造一星期后他开始野外露营
后来西姆回家补给物资回来时森林大雪纷飞他劈柴储备带回食物调味料和被褥提高居住舒适度还用干草做靠垫他用壁炉烤牛排搭配红酒
第二天积雪融化西姆制作室外篝火堆防野兽用大树夹缝掰弯木棍堆积而成晚上点燃处理废料结束后用雪球灭火最后在室内二十五度的环境中裹被入睡
</example_text_2>
<example_text_3>
如果战争到来这个深埋地下十几米的庇护所绝对是 bug 般的存在即使被敌人发现还能通过快速通道一秒逃出里面不仅有竹子地暖地下水井还自制抽水机在解决用水问题的同时甚至自研无土栽培技术过上完全自给自足的生活
阿伟的老婆美如花但阿伟从来不回家来到野外他乐哈哈一言不合就开挖众所周知当战争来临时地下堡垒的安全性是最高的阿伟苦苦研习两载半只为练就一身挖洞本领在这双逆天麒麟臂的加持下如此坚硬的泥土都只能当做炮灰
得到了充足的空间后他便开始对这些边缘进行打磨随后阿伟将细线捆在木棍上以此描绘出圆柱的轮廓接着再一点点铲掉多余的部分虽然是由泥土一体式打造但这样的桌子保准用上千年都不成问题
考虑到十几米的深度进出非常不方便于是阿伟找来两根长达 66.6 米的木头打算为庇护所打造一条快速通道只见他将木桩牢牢地插入地下并顺着洞口的方向延伸出去直到贯穿整个山洞接着在每个木桩的连接处钉入铁钉确保轨道不能有一毫米的偏差完成后再制作一个木质框架从而达到前后滑动的效果
不得不说阿伟这手艺简直就是大钢管子杵青蛙在上面放上一个木制的车斗还能加快搬运泥土的速度没多久庇护所的内部就已经初见雏形为了住起来更加舒适还需要为自己打造一张床虽然深处的泥土同样很坚固但好处就是不用担心垮塌的风险
阿伟不仅设计了更加符合人体工学的拱形并且还在一旁雕刻处壁龛就是这氛围怎么看着有点不太吉利别看阿伟一身腱子肉但这身体里的艺术细菌可不少每个边缘的地方他都做了精雕细琢瞬间让整个卧室的颜值提升一大截
住在地下的好处就是房子面积全靠挖每平方消耗两个半馒头不仅没有了房贷的压力就连买墓地的钱也省了阿伟将中间的墙壁挖空从而得到取暖的壁炉当然最重要的还有排烟问题要想从上往下打通十几米的山体是件极其困难的事好在阿伟年轻时报过忆坤年的古墓派补习班这打洞技术堪比隔壁学校的土拨鼠专业虽然深度长达十几米但排烟效果却一点不受影响一个字专业
随后阿伟继续对壁炉底部雕刻打通了底部放柴火的空间并制作出放锅的灶头完成后阿伟从侧面将壁炉打通并制作出一条导热的通道以此连接到床铺的位置毕竟住在这么一个风湿宝地不注意保暖除湿很容易得老寒腿
阿伟在床面上挖出一条条管道以便于温度能传输到床的每个角落接下来就可以根据这些通道的长度裁切出同样长短的竹子根据竹筒的大小凿出相互连接的孔洞最后再将竹筒内部打通以达到温度传送的效果
而后阿伟将这些管道安装到凹槽内在他严谨的制作工艺下每根竹子刚好都能镶嵌进去在铺设床面之前还需要用木塞把圆孔堵住防止泥土掉落进管道泥土虽然不能隔绝湿气但却是十分优良的导热材料等他把床面都压平后就可以小心的将这些木塞拔出来最后再用黏土把剩余的管道也遮盖起来直到整个墙面恢复原样
接下来还需要测试一下加热效果当他把火点起来后温度很快就传送到了管道内把火力一点点加大直到热气流淌到更远的床面随着小孔里的青烟冒出也预示着阿伟的地暖可以投入使用而后阿伟制作了一些竹条并用细绳将它们喜结连理
千里之行始于足下美好的家园要靠自己双手打造明明可以靠才艺吃饭的阿伟偏偏要用八块腹肌征服大家就问这样的男人哪个野生婆娘不喜欢完成后阿伟还用自己 35 码的大腚感受了一下真烫
随后阿伟来到野区找到一根上好的雷击木他当即就把木头咔嚓成两段并取下两节较为完整的带了回去刚好能和圆桌配套另外一个在里面凿出凹槽并插入木棍连接得到一个夯土的木锤住过农村的小伙伴都知道这样夯出来的地面堪比水泥地不仅坚硬耐磨还不用担心脚底打滑忙碌了一天的阿伟已经饥渴难耐拿出野生小烤肠安安心心住新房光脚爬上大热炕一觉能睡到天亮
第二天阿伟打算将房间扩宽毕竟吃住的地方有了还要解决个人卫生的问题阿伟在另一侧增加了一个房间他打算将这里打造成洗澡的地方为了防止泥土垮塌他将顶部做成圆弧形等挖出足够的空间后旁边的泥土已经堆成了小山
为了方便清理这些泥土阿伟在之前的轨道增加了转弯交接处依然是用铁钉固定一直延伸到房间的最里面有了运输车的帮助这些成吨的泥土也能轻松的运送出去并且还能体验过山车的感觉很快他就完成了清理工作
为了更方便的在里面洗澡他将底部一点点挖空这么大的浴缸看来阿伟并不打算一个人住完成后他将墙面雕刻的凹凸有致让这里看起来更加豪华接着用洛阳铲挖出排水口并用一根相同大小的竹筒作为开关
由于四周都是泥土还不能防水阿伟特意找了一些白蚁巢用来制作可以防水的野生水泥现在就可以将里里外外能接触到水的地方都涂抹一遍细心的阿伟还找来这种 500 克一斤的鹅卵石对池子表面进行装饰
没错水源问题阿伟早已经考虑在内他打算直接在旁边挖个水井毕竟已经挖了这么深再向下挖一挖应该就能到达地下水的深度经过几日的奋战能看得出阿伟已经消瘦了不少但一想到马上就能拥有的豪宅他直接化身为无情的挖土机器很快就挖到了好几米的深度
考虑到自己的弹跳力有限阿伟在一旁定入木桩然后通过绳子爬上爬下随着深度越来越深井底已经开始渗出水来这也预示着打井成功没多久这里面将渗满泉水仅凭一次就能挖到水源看来这里还真是块风湿宝地
随后阿伟在井口四周挖出凹槽以便于井盖的安置这一量才知道井的深度已经达到了足足的 5 阿伟把木板组合在一起再沿着标记切掉多余部分他甚至还给井盖做了把手可是如何从这么深的井里打水还是个问题但从阿伟坚定的眼神来看他应该想到了解决办法
只见他将树桩锯成两半然后用凿子把里面一点点掏空另外一半也是如法炮制接着还要在底部挖出圆孔要想成功将水从 5 米深的地方抽上来那就不得不提到大家熟知的勾股定理没错这跟勾股定理没什么关系
阿伟给竹筒做了一个木塞并在里面打上安装连接轴的孔为了增加密闭性阿伟不得不牺牲了自己的 AJ剪出与木塞相同的大小后再用木钉固定住随后他收集了一些树胶并放到火上加热融化接下来就可以涂在木塞上增加使用寿命
现在将竹筒组装完成就可以利用虹吸原理将水抽上来完成后就可以把井盖盖上去再用泥土在上面覆盖现在就不用担心失足掉下去了
接下来阿伟去采集了一些大漆将它涂抹在木桶接缝处就能将其二合为一完了再接入旁边浴缸的入水口每个连接的地方都要做好密封不然后面很容易漏水随后就可以安装上活塞并用一根木桩作为省力杠杆根据空气压强的原理将井水抽上来
经过半小时的来回拉扯硕大的浴缸终于被灌满阿伟也是忍不住洗了把脸接下来还需要解决排水的问题阿伟在地上挖出沟渠一直贯穿到屋外然后再用竹筒从出水口连接每个接口处都要抹上胶水就连门外的出水口他都做了隐藏
在野外最重要的就是庇护所水源还有食物既然已经完成了前二者那么阿伟还需要拥有可持续发展的食物来源他先是在地上挖了两排地洞然后在每根竹筒的表面都打上无数孔洞这就是他打算用来种植的载体在此之前还需要用大火对竹筒进行杀菌消毒
趁着这时候他去搬了一麻袋的木屑先用芭蕉叶覆盖在上面再铺上厚厚的黏土隔绝温度在火焰的温度下能让里面的木屑达到生长条件
等到第二天所有材料都晾凉后阿伟才将竹筒内部掏空并将木屑一点点地塞入竹筒一切准备就绪就可以将竹筒插入提前挖好的地洞最后再往竹筒里塞入种子依靠房间内的湿度和温度就能达到大棚种植的效果稍加时日这些种子就会慢慢发芽
虽然暂时还吃不上自己培养的食物但好在阿伟从表哥贺强那里学到不少钓鱼本领哪怕只有一根小小的竹竿也能让他钓上两斤半的大鲶鱼新鲜的食材那肯定是少不了高温消毒的过程趁着鱼没熟阿伟直接爬进浴缸冰凉的井水瞬间洗去了身上的疲惫这一刻的阿伟是无比的享受
不久后鱼也烤得差不多了阿伟的生活现在可以说是有滋有味住在十几米的地下不仅能安全感满满哪怕遇到危险还能通过轨道快速逃生
<example_text_3>
<video_frame_description>
%s
</video_frame_description>
我正在尝试做这个内容的解说纪录片视频我需要你以 <video_frame_description> </video_frame_description> 中的内容为解说目标根据我刚才提供给你的对标文案 <example_text> 特点以及你总结的特点帮我生成一段关于荒野建造的解说文案文案需要符合平台受欢迎的解说风格请使用 json 格式进行输出使用 <output> 中的输出格式
<output>
{
"items": [
{
"_id": 1, # 唯一递增id
"timestamp": "00:00:05,390-00:00:10,430",
"picture": "画面描述",
"narration": "解说文案",
}
}
</output>
<restriction>
1. 只输出 json 内容不要输出其他任何说明性的文字
2. 解说文案的语言使用 简体中文
3. 严禁虚构画面所有画面只能从 <video_frame_description> 中摘取
</restriction>
""" % (markdown_content)
# 使用OpenAI SDK初始化客户端
client = OpenAI(
api_key=api_key,
base_url=base_url
)
# 使用SDK发送请求
if model not in ["deepseek-reasoner"]:
# deepseek-reasoner 不支持 json 输出
response = client.chat.completions.create(
model=model,
messages=[
{"role": "system", "content": "你是一名专业的短视频解说文案撰写专家。"},
{"role": "user", "content": prompt}
],
temperature=1.5,
response_format={"type": "json_object"},
)
# 提取生成的文案
if response.choices and len(response.choices) > 0:
narration_script = response.choices[0].message.content
# 打印消耗的tokens
logger.debug(f"消耗的tokens: {response.usage.total_tokens}")
return narration_script
else:
return "生成解说文案失败: 未获取到有效响应"
else:
# 不支持 json 输出,需要多一步处理 ```json ``` 的步骤
response = client.chat.completions.create(
model=model,
messages=[
{"role": "system", "content": "你是一名专业的短视频解说文案撰写专家。"},
{"role": "user", "content": prompt}
],
temperature=1.5,
)
# 提取生成的文案
if response.choices and len(response.choices) > 0:
narration_script = response.choices[0].message.content
# 打印消耗的tokens
logger.debug(f"文案消耗的tokens: {response.usage.total_tokens}")
# 清理 narration_script 字符串前后的 ```json ``` 字符串
narration_script = narration_script.replace("```json", "").replace("```", "")
return narration_script
else:
return "生成解说文案失败: 未获取到有效响应"
except Exception as e:
return f"调用API生成解说文案时出错: {traceback.format_exc()}"
if __name__ == '__main__':
text_provider = 'openai'
text_api_key = "sk-xxx"
text_model = "deepseek-reasoner"
text_base_url = "https://api.deepseek.com"
video_frame_description_path = "/Users/apple/Desktop/home/NarratoAI/storage/temp/analysis/frame_analysis_20250508_1139.json"
# 测试新的JSON文件
test_file_path = "/Users/apple/Desktop/home/NarratoAI/storage/temp/analysis/frame_analysis_20250508_2258.json"
markdown_output = parse_frame_analysis_to_markdown(test_file_path)
# print(markdown_output)
# 输出到文件以便检查格式
output_file = "/Users/apple/Desktop/home/NarratoAI/storage/temp/家里家外1-5.md"
with open(output_file, 'w', encoding='utf-8') as f:
f.write(markdown_output)
# print(f"\n已将Markdown输出保存到: {output_file}")
# # 生成解说文案
# narration = generate_narration(
# markdown_output,
# text_api_key,
# base_url=text_base_url,
# model=text_model
# )
#
# # 保存解说文案
# print(narration)
# print(type(narration))
# narration_file = "/Users/apple/Desktop/home/NarratoAI/storage/temp/final_narration_script.json"
# with open(narration_file, 'w', encoding='utf-8') as f:
# f.write(narration)
# print(f"\n已将解说文案保存到: {narration_file}")

View File

@ -0,0 +1,426 @@
#!/usr/bin/env python
# -*- coding: UTF-8 -*-
'''
@Project: NarratoAI
@File : generate_video
@Author : 小林同学
@Date : 2025/5/7 上午11:55
'''
import os
import traceback
from typing import Optional, Dict, Any
from loguru import logger
from moviepy import (
VideoFileClip,
AudioFileClip,
CompositeAudioClip,
CompositeVideoClip,
TextClip,
afx
)
from moviepy.video.tools.subtitles import SubtitlesClip
from PIL import ImageFont
from app.utils import utils
from app.models.schema import AudioVolumeDefaults
def merge_materials(
video_path: str,
audio_path: str,
output_path: str,
subtitle_path: Optional[str] = None,
bgm_path: Optional[str] = None,
options: Optional[Dict[str, Any]] = None
) -> str:
"""
合并视频音频BGM和字幕素材生成最终视频
参数:
video_path: 视频文件路径
audio_path: 音频文件路径
output_path: 输出文件路径
subtitle_path: 字幕文件路径可选
bgm_path: 背景音乐文件路径可选
options: 其他选项配置可包含以下字段:
- voice_volume: 人声音量默认1.0
- bgm_volume: 背景音乐音量默认0.3
- original_audio_volume: 原始音频音量默认0.0
- keep_original_audio: 是否保留原始音频默认False
- subtitle_font: 字幕字体默认None系统会使用默认字体
- subtitle_font_size: 字幕字体大小默认40
- subtitle_color: 字幕颜色默认白色
- subtitle_bg_color: 字幕背景颜色默认透明
- subtitle_position: 字幕位置可选值'bottom', 'top', 'center'默认'bottom'
- custom_position: 自定义位置
- stroke_color: 描边颜色默认黑色
- stroke_width: 描边宽度默认1
- threads: 处理线程数默认2
- fps: 输出帧率默认30
- subtitle_enabled: 是否启用字幕默认True
返回:
输出视频的路径
"""
# 合并选项默认值
if options is None:
options = {}
# 设置默认参数值 - 使用统一的音量配置
voice_volume = options.get('voice_volume', AudioVolumeDefaults.VOICE_VOLUME)
bgm_volume = options.get('bgm_volume', AudioVolumeDefaults.BGM_VOLUME)
# 修复bug: 将原声音量默认值从0.0改为0.7,确保短剧解说模式下原片音量正常
original_audio_volume = options.get('original_audio_volume', AudioVolumeDefaults.ORIGINAL_VOLUME)
keep_original_audio = options.get('keep_original_audio', True) # 默认保留原声
subtitle_font = options.get('subtitle_font', '')
subtitle_font_size = options.get('subtitle_font_size', 40)
subtitle_color = options.get('subtitle_color', '#FFFFFF')
subtitle_bg_color = options.get('subtitle_bg_color', 'transparent')
subtitle_position = options.get('subtitle_position', 'bottom')
custom_position = options.get('custom_position', 70)
stroke_color = options.get('stroke_color', '#000000')
stroke_width = options.get('stroke_width', 1)
threads = options.get('threads', 2)
fps = options.get('fps', 30)
subtitle_enabled = options.get('subtitle_enabled', True)
# 配置日志 - 便于调试问题
logger.info(f"音量配置详情:")
logger.info(f" - 配音音量: {voice_volume}")
logger.info(f" - 背景音乐音量: {bgm_volume}")
logger.info(f" - 原声音量: {original_audio_volume}")
logger.info(f" - 是否保留原声: {keep_original_audio}")
logger.info(f"字幕配置详情:")
logger.info(f" - 是否启用字幕: {subtitle_enabled}")
logger.info(f" - 字幕文件路径: {subtitle_path}")
# 音量参数验证
def validate_volume(volume, name):
if not (AudioVolumeDefaults.MIN_VOLUME <= volume <= AudioVolumeDefaults.MAX_VOLUME):
logger.warning(f"{name}音量 {volume} 超出有效范围 [{AudioVolumeDefaults.MIN_VOLUME}, {AudioVolumeDefaults.MAX_VOLUME}],将被限制")
return max(AudioVolumeDefaults.MIN_VOLUME, min(volume, AudioVolumeDefaults.MAX_VOLUME))
return volume
voice_volume = validate_volume(voice_volume, "配音")
bgm_volume = validate_volume(bgm_volume, "背景音乐")
original_audio_volume = validate_volume(original_audio_volume, "原声")
# 处理透明背景色问题 - MoviePy 2.1.1不支持'transparent'值
if subtitle_bg_color == 'transparent':
subtitle_bg_color = None # None在新版MoviePy中表示透明背景
# 创建输出目录(如果不存在)
output_dir = os.path.dirname(output_path)
os.makedirs(output_dir, exist_ok=True)
logger.info(f"开始合并素材...")
logger.info(f" ① 视频: {video_path}")
logger.info(f" ② 音频: {audio_path}")
if subtitle_path:
logger.info(f" ③ 字幕: {subtitle_path}")
if bgm_path:
logger.info(f" ④ 背景音乐: {bgm_path}")
logger.info(f" ⑤ 输出: {output_path}")
# 加载视频
try:
video_clip = VideoFileClip(video_path)
logger.info(f"视频尺寸: {video_clip.size[0]}x{video_clip.size[1]}, 时长: {video_clip.duration}")
# 提取视频原声(如果需要)
original_audio = None
if keep_original_audio and original_audio_volume > 0:
try:
original_audio = video_clip.audio
if original_audio:
original_audio = original_audio.with_effects([afx.MultiplyVolume(original_audio_volume)])
logger.info(f"已提取视频原声,音量设置为: {original_audio_volume}")
else:
logger.warning("视频没有音轨,无法提取原声")
except Exception as e:
logger.error(f"提取视频原声失败: {str(e)}")
original_audio = None
# 移除原始音轨,稍后会合并新的音频
video_clip = video_clip.without_audio()
except Exception as e:
logger.error(f"加载视频失败: {str(e)}")
raise
# 处理背景音乐和所有音频轨道合成
audio_tracks = []
# 先添加主音频(配音)
if audio_path and os.path.exists(audio_path):
try:
voice_audio = AudioFileClip(audio_path).with_effects([afx.MultiplyVolume(voice_volume)])
audio_tracks.append(voice_audio)
logger.info(f"已添加配音音频,音量: {voice_volume}")
except Exception as e:
logger.error(f"加载配音音频失败: {str(e)}")
# 添加原声(如果需要)
if original_audio is not None:
audio_tracks.append(original_audio)
logger.info(f"已添加视频原声,音量: {original_audio_volume}")
# 添加背景音乐(如果有)
if bgm_path and os.path.exists(bgm_path):
try:
bgm_clip = AudioFileClip(bgm_path).with_effects([
afx.MultiplyVolume(bgm_volume),
afx.AudioFadeOut(3),
afx.AudioLoop(duration=video_clip.duration),
])
audio_tracks.append(bgm_clip)
logger.info(f"已添加背景音乐,音量: {bgm_volume}")
except Exception as e:
logger.error(f"添加背景音乐失败: \n{traceback.format_exc()}")
# 合成最终的音频轨道
if audio_tracks:
final_audio = CompositeAudioClip(audio_tracks)
video_clip = video_clip.with_audio(final_audio)
logger.info(f"已合成所有音频轨道,共{len(audio_tracks)}")
else:
logger.warning("没有可用的音频轨道,输出视频将没有声音")
# 处理字体路径
font_path = None
if subtitle_path and subtitle_font:
font_path = os.path.join(utils.font_dir(), subtitle_font)
if os.name == "nt":
font_path = font_path.replace("\\", "/")
logger.info(f"使用字体: {font_path}")
# 处理视频尺寸
video_width, video_height = video_clip.size
# 字幕处理函数
def create_text_clip(subtitle_item):
"""创建单个字幕片段"""
phrase = subtitle_item[1]
max_width = video_width * 0.9
# 如果有字体路径,进行文本换行处理
wrapped_txt = phrase
txt_height = 0
if font_path:
wrapped_txt, txt_height = wrap_text(
phrase,
max_width=max_width,
font=font_path,
fontsize=subtitle_font_size
)
# 创建文本片段
try:
_clip = TextClip(
text=wrapped_txt,
font=font_path,
font_size=subtitle_font_size,
color=subtitle_color,
bg_color=subtitle_bg_color, # 这里已经在前面处理过None表示透明
stroke_color=stroke_color,
stroke_width=stroke_width,
)
except Exception as e:
logger.error(f"创建字幕片段失败: {str(e)}, 使用简化参数重试")
# 如果上面的方法失败,尝试使用更简单的参数
_clip = TextClip(
text=wrapped_txt,
font=font_path,
font_size=subtitle_font_size,
color=subtitle_color,
)
# 设置字幕时间
duration = subtitle_item[0][1] - subtitle_item[0][0]
_clip = _clip.with_start(subtitle_item[0][0])
_clip = _clip.with_end(subtitle_item[0][1])
_clip = _clip.with_duration(duration)
# 设置字幕位置
if subtitle_position == "bottom":
_clip = _clip.with_position(("center", video_height * 0.95 - _clip.h))
elif subtitle_position == "top":
_clip = _clip.with_position(("center", video_height * 0.05))
elif subtitle_position == "custom":
margin = 10
max_y = video_height - _clip.h - margin
min_y = margin
custom_y = (video_height - _clip.h) * (custom_position / 100)
custom_y = max(
min_y, min(custom_y, max_y)
)
_clip = _clip.with_position(("center", custom_y))
else: # center
_clip = _clip.with_position(("center", "center"))
return _clip
# 创建TextClip工厂函数
def make_textclip(text):
return TextClip(
text=text,
font=font_path,
font_size=subtitle_font_size,
color=subtitle_color,
)
# 处理字幕 - 修复字幕开关bug
if subtitle_enabled and subtitle_path and os.path.exists(subtitle_path):
logger.info("字幕已启用,开始处理字幕文件")
try:
# 加载字幕文件
sub = SubtitlesClip(
subtitles=subtitle_path,
encoding="utf-8",
make_textclip=make_textclip
)
# 创建每个字幕片段
text_clips = []
for item in sub.subtitles:
clip = create_text_clip(subtitle_item=item)
text_clips.append(clip)
# 合成视频和字幕
video_clip = CompositeVideoClip([video_clip, *text_clips])
logger.info(f"已添加{len(text_clips)}个字幕片段")
except Exception as e:
logger.error(f"处理字幕失败: \n{traceback.format_exc()}")
elif not subtitle_enabled:
logger.info("字幕已禁用,跳过字幕处理")
elif not subtitle_path:
logger.info("未提供字幕文件路径,跳过字幕处理")
elif not os.path.exists(subtitle_path):
logger.warning(f"字幕文件不存在: {subtitle_path},跳过字幕处理")
# 导出最终视频
try:
video_clip.write_videofile(
output_path,
audio_codec="aac",
temp_audiofile_path=output_dir,
threads=threads,
fps=fps,
)
logger.success(f"素材合并完成: {output_path}")
except Exception as e:
logger.error(f"导出视频失败: {str(e)}")
raise
finally:
# 释放资源
video_clip.close()
del video_clip
return output_path
def wrap_text(text, max_width, font="Arial", fontsize=60):
"""
文本换行函数使长文本适应指定宽度
参数:
text: 需要换行的文本
max_width: 最大宽度像素
font: 字体路径
fontsize: 字体大小
返回:
换行后的文本和文本高度
"""
# 创建ImageFont对象
try:
font_obj = ImageFont.truetype(font, fontsize)
except:
# 如果无法加载指定字体,使用默认字体
font_obj = ImageFont.load_default()
def get_text_size(inner_text):
inner_text = inner_text.strip()
left, top, right, bottom = font_obj.getbbox(inner_text)
return right - left, bottom - top
width, height = get_text_size(text)
if width <= max_width:
return text, height
processed = True
_wrapped_lines_ = []
words = text.split(" ")
_txt_ = ""
for word in words:
_before = _txt_
_txt_ += f"{word} "
_width, _height = get_text_size(_txt_)
if _width <= max_width:
continue
else:
if _txt_.strip() == word.strip():
processed = False
break
_wrapped_lines_.append(_before)
_txt_ = f"{word} "
_wrapped_lines_.append(_txt_)
if processed:
_wrapped_lines_ = [line.strip() for line in _wrapped_lines_]
result = "\n".join(_wrapped_lines_).strip()
height = len(_wrapped_lines_) * height
return result, height
_wrapped_lines_ = []
chars = list(text)
_txt_ = ""
for word in chars:
_txt_ += word
_width, _height = get_text_size(_txt_)
if _width <= max_width:
continue
else:
_wrapped_lines_.append(_txt_)
_txt_ = ""
_wrapped_lines_.append(_txt_)
result = "\n".join(_wrapped_lines_).strip()
height = len(_wrapped_lines_) * height
return result, height
if __name__ == '__main__':
merger_mp4 = '/Users/apple/Desktop/home/NarratoAI/storage/tasks/qyn2-2-demo/merger.mp4'
merger_sub = '/Users/apple/Desktop/home/NarratoAI/storage/tasks/qyn2-2-demo/merged_subtitle_00_00_00-00_01_30.srt'
merger_audio = '/Users/apple/Desktop/home/NarratoAI/storage/tasks/qyn2-2-demo/merger_audio.mp3'
bgm_path = '/Users/apple/Desktop/home/NarratoAI/resource/songs/bgm.mp3'
output_video = '/Users/apple/Desktop/home/NarratoAI/storage/tasks/qyn2-2-demo/combined_test.mp4'
# 调用示例
options = {
'voice_volume': 1.0, # 配音音量
'bgm_volume': 0.1, # 背景音乐音量
'original_audio_volume': 1.0, # 视频原声音量0表示不保留
'keep_original_audio': True, # 是否保留原声
'subtitle_enabled': True, # 是否启用字幕 - 修复字幕开关bug
'subtitle_font': 'MicrosoftYaHeiNormal.ttc', # 这里使用相对字体路径,会自动在 font_dir() 目录下查找
'subtitle_font_size': 40,
'subtitle_color': '#FFFFFF',
'subtitle_bg_color': None, # 直接使用None表示透明背景
'subtitle_position': 'bottom',
'threads': 2
}
try:
merge_materials(
video_path=merger_mp4,
audio_path=merger_audio,
subtitle_path=merger_sub,
bgm_path=bgm_path,
output_path=output_video,
options=options
)
except Exception as e:
logger.error(f"合并素材失败: \n{traceback.format_exc()}")

View File

@ -7,7 +7,7 @@ from typing import List
from loguru import logger
from openai import OpenAI
from openai import AzureOpenAI
from moviepy.editor import VideoFileClip
from moviepy import VideoFileClip
from openai.types.chat import ChatCompletion
import google.generativeai as gemini
from googleapiclient.errors import ResumableUploadError

View File

@ -4,15 +4,17 @@ import random
import traceback
from urllib.parse import urlencode
from datetime import datetime
import json
import requests
from typing import List
from typing import List, Optional
from loguru import logger
from moviepy.video.io.VideoFileClip import VideoFileClip
from app.config import config
from app.models.schema import VideoAspect, VideoConcatMode, MaterialInfo
from app.utils import utils
from app.utils import ffmpeg_utils
requested_count = 0
@ -256,10 +258,10 @@ def time_to_seconds(time_str: str) -> float:
"""
将时间字符串转换为秒数
支持格式: 'HH:MM:SS,mmm' (::,毫秒)
Args:
time_str: 时间字符串, "00:00:20,100"
Returns:
float: 转换后的秒数(包含毫秒)
"""
@ -281,7 +283,7 @@ def time_to_seconds(time_str: str) -> float:
raise ValueError("时间格式必须为 HH:MM:SS,mmm")
return seconds + ms
except ValueError as e:
logger.error(f"时间格式错误: {time_str}")
raise ValueError(f"时间格式错误: 必须为 HH:MM:SS,mmm 格式") from e
@ -290,10 +292,10 @@ def time_to_seconds(time_str: str) -> float:
def format_timestamp(seconds: float) -> str:
"""
将秒数转换为可读的时间格式 (HH:MM:SS,mmm)
Args:
seconds: 秒数(可包含毫秒)
Returns:
str: 格式化的时间字符串, "00:00:20,100"
"""
@ -302,14 +304,26 @@ def format_timestamp(seconds: float) -> str:
seconds_remain = seconds % 60
whole_seconds = int(seconds_remain)
milliseconds = int((seconds_remain - whole_seconds) * 1000)
return f"{hours:02d}:{minutes:02d}:{whole_seconds:02d},{milliseconds:03d}"
def save_clip_video(timestamp: str, origin_video: str, save_dir: str = "") -> dict:
def _detect_hardware_acceleration() -> Optional[str]:
"""
检测系统可用的硬件加速器
Returns:
Optional[str]: 硬件加速参数如果不支持则返回None
"""
# 使用集中式硬件加速检测
hwaccel_type = ffmpeg_utils.get_ffmpeg_hwaccel_type()
return hwaccel_type
def save_clip_video(timestamp: str, origin_video: str, save_dir: str = "") -> str:
"""
保存剪辑后的视频
Args:
timestamp: 需要裁剪的时间戳,格式为 'HH:MM:SS,mmm-HH:MM:SS,mmm'
例如: '00:00:00,000-00:00:20,100'
@ -328,85 +342,151 @@ def save_clip_video(timestamp: str, origin_video: str, save_dir: str = "") -> di
if not os.path.exists(save_dir):
os.makedirs(save_dir)
# 生成更规范的视频文件名
video_id = f"vid-{timestamp.replace(':', '-').replace(',', '_')}"
video_path = os.path.join(save_dir, f"{video_id}.mp4")
# 解析时间戳
start_str, end_str = timestamp.split('-')
# 格式化输出文件名(使用连字符替代冒号和逗号)
safe_start_time = start_str.replace(':', '-').replace(',', '-')
safe_end_time = end_str.replace(':', '-').replace(',', '-')
output_filename = f"vid_{safe_start_time}@{safe_end_time}.mp4"
video_path = os.path.join(save_dir, output_filename)
# 如果视频已存在,直接返回
if os.path.exists(video_path) and os.path.getsize(video_path) > 0:
logger.info(f"video already exists: {video_path}")
return {timestamp: video_path}
logger.info(f"视频已存在: {video_path}")
return video_path
try:
# 加载视频获取总时长
video = VideoFileClip(origin_video)
total_duration = video.duration
# 解析时间戳
start_str, end_str = timestamp.split('-')
# 检查视频是否存在
if not os.path.exists(origin_video):
logger.error(f"源视频文件不存在: {origin_video}")
return ''
# 获取视频总时长
try:
probe_cmd = ["ffprobe", "-v", "error", "-show_entries", "format=duration",
"-of", "default=noprint_wrappers=1:nokey=1", origin_video]
total_duration = float(subprocess.check_output(probe_cmd).decode('utf-8').strip())
except subprocess.CalledProcessError as e:
logger.error(f"获取视频时长失败: {str(e)}")
return ''
# 计算时间点
start = time_to_seconds(start_str)
end = time_to_seconds(end_str)
# 验证时间段
if start >= total_duration:
logger.warning(f"起始时间 {format_timestamp(start)} ({start:.3f}秒) 超出视频总时长 {format_timestamp(total_duration)} ({total_duration:.3f}秒)")
video.close()
return {}
return ''
if end > total_duration:
logger.warning(f"结束时间 {format_timestamp(end)} ({end:.3f}秒) 超出视频总时长 {format_timestamp(total_duration)} ({total_duration:.3f}秒),将自动调整为视频结尾")
end = total_duration
if end <= start:
logger.warning(f"结束时间 {format_timestamp(end)} 必须大于起始时间 {format_timestamp(start)}")
video.close()
return {}
# 剪辑视频
return ''
# 计算剪辑时长
duration = end - start
logger.info(f"开始剪辑视频: {format_timestamp(start)} - {format_timestamp(end)},时长 {format_timestamp(duration)}")
# 剪辑视频
subclip = video.subclip(start, end)
try:
# 检查视频是否有音频轨道并写入文件
subclip.write_videofile(
video_path,
codec='libx264',
audio_codec='aac',
temp_audiofile='temp-audio.m4a',
remove_temp=True,
audio=(subclip.audio is not None),
logger=None
# logger.info(f"开始剪辑视频: {format_timestamp(start)} - {format_timestamp(end)},时长 {format_timestamp(duration)}")
# 获取硬件加速选项
hwaccel = _detect_hardware_acceleration()
hwaccel_args = []
if hwaccel:
hwaccel_args = ffmpeg_utils.get_ffmpeg_hwaccel_args()
# 转换为FFmpeg兼容的时间格式逗号替换为点
ffmpeg_start_time = start_str.replace(',', '.')
ffmpeg_end_time = end_str.replace(',', '.')
# 构建FFmpeg命令 - 使用新的智能编码器选择
encoder = ffmpeg_utils.get_optimal_ffmpeg_encoder()
ffmpeg_cmd = [
"ffmpeg", "-y", *hwaccel_args,
"-i", origin_video,
"-ss", ffmpeg_start_time,
"-to", ffmpeg_end_time,
"-c:v", encoder,
"-c:a", "aac",
"-strict", "experimental",
video_path
]
# 根据编码器类型添加特定参数
if "nvenc" in encoder:
ffmpeg_cmd.insert(-1, "-preset")
ffmpeg_cmd.insert(-1, "medium")
elif "videotoolbox" in encoder:
ffmpeg_cmd.insert(-1, "-profile:v")
ffmpeg_cmd.insert(-1, "high")
elif "qsv" in encoder:
ffmpeg_cmd.insert(-1, "-preset")
ffmpeg_cmd.insert(-1, "medium")
elif encoder == "libx264":
ffmpeg_cmd.insert(-1, "-preset")
ffmpeg_cmd.insert(-1, "medium")
ffmpeg_cmd.insert(-1, "-crf")
ffmpeg_cmd.insert(-1, "23")
# 执行FFmpeg命令
# logger.info(f"裁剪视频片段: {timestamp} -> {ffmpeg_start_time}到{ffmpeg_end_time}")
# logger.debug(f"执行命令: {' '.join(ffmpeg_cmd)}")
# 在Windows系统上使用UTF-8编码处理输出避免GBK编码错误
is_windows = os.name == 'nt'
if is_windows:
process = subprocess.run(
ffmpeg_cmd,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
encoding='utf-8', # 明确指定编码为UTF-8
text=True,
check=False # 不抛出异常,我们会检查返回码
)
# 验证生成的视频文件
if os.path.exists(video_path) and os.path.getsize(video_path) > 0:
with VideoFileClip(video_path) as clip:
if clip.duration > 0 and clip.fps > 0:
return {timestamp: video_path}
raise ValueError("视频文件验证失败")
except Exception as e:
logger.warning(f"视频文件处理失败: {video_path} => {str(e)}")
else:
process = subprocess.run(
ffmpeg_cmd,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
text=True,
check=False # 不抛出异常,我们会检查返回码
)
# 检查是否成功
if process.returncode != 0:
logger.error(f"视频剪辑失败: {process.stderr}")
if os.path.exists(video_path):
os.remove(video_path)
except Exception as e:
logger.warning(f"视频剪辑失败: \n{str(traceback.format_exc())}")
return ''
# 验证生成的视频文件
if os.path.exists(video_path) and os.path.getsize(video_path) > 0:
# 检查视频是否可播放
probe_cmd = ["ffprobe", "-v", "error", video_path]
# 在Windows系统上使用UTF-8编码
if is_windows:
validate_result = subprocess.run(probe_cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, encoding='utf-8')
else:
validate_result = subprocess.run(probe_cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
if validate_result.returncode == 0:
logger.info(f"视频剪辑成功: {video_path}")
return video_path
logger.error("视频文件验证失败")
if os.path.exists(video_path):
os.remove(video_path)
finally:
# 确保视频对象被正确关闭
try:
video.close()
if 'subclip' in locals():
subclip.close()
except:
pass
return {}
return ''
except Exception as e:
logger.error(f"视频剪辑过程中发生错误: \n{str(traceback.format_exc())}")
if os.path.exists(video_path):
os.remove(video_path)
return ''
def clip_videos(task_id: str, timestamp_terms: List[str], origin_video: str, progress_callback=None) -> dict:
@ -428,17 +508,17 @@ def clip_videos(task_id: str, timestamp_terms: List[str], origin_video: str, pro
try:
saved_video_path = save_clip_video(timestamp=item, origin_video=origin_video, save_dir=material_directory)
if saved_video_path:
logger.info(f"video saved: {saved_video_path}")
video_paths.update(saved_video_path)
video_paths.update({index+1:saved_video_path})
# 更新进度
if progress_callback:
progress_callback(index + 1, total_items)
except Exception as e:
logger.error(f"视频裁剪失败: {utils.to_json(item)} =>\n{str(traceback.format_exc())}")
return {}
logger.success(f"裁剪 {len(video_paths)} videos")
# logger.debug(json.dumps(video_paths, indent=4, ensure_ascii=False))
return video_paths

View File

@ -0,0 +1,673 @@
#!/usr/bin/env python
# -*- coding: UTF-8 -*-
'''
@Project: NarratoAI
@File : merger_video
@Author : 小林同学
@Date : 2025/5/6 下午7:38
'''
import os
import shutil
import subprocess
from enum import Enum
from typing import List, Optional, Tuple
from loguru import logger
from app.utils import ffmpeg_utils
class VideoAspect(Enum):
"""视频宽高比枚举"""
landscape = "16:9" # 横屏 16:9
landscape_2 = "4:3"
portrait = "9:16" # 竖屏 9:16
portrait_2 = "3:4"
square = "1:1" # 方形 1:1
def to_resolution(self) -> Tuple[int, int]:
"""根据宽高比返回标准分辨率"""
if self == VideoAspect.portrait:
return 1080, 1920 # 竖屏 9:16
elif self == VideoAspect.portrait_2:
return 720, 1280 # 竖屏 4:3
elif self == VideoAspect.landscape:
return 1920, 1080 # 横屏 16:9
elif self == VideoAspect.landscape_2:
return 1280, 720 # 横屏 4:3
elif self == VideoAspect.square:
return 1080, 1080 # 方形 1:1
else:
return 1080, 1920 # 默认竖屏
def check_ffmpeg_installation() -> bool:
"""
检查ffmpeg是否已安装
Returns:
bool: 如果安装则返回True否则返回False
"""
try:
subprocess.run(['ffmpeg', '-version'], stdout=subprocess.PIPE, stderr=subprocess.PIPE, check=True)
return True
except (subprocess.SubprocessError, FileNotFoundError):
logger.error("ffmpeg未安装或不在系统PATH中请安装ffmpeg")
return False
def get_hardware_acceleration_option() -> Optional[str]:
"""
根据系统环境选择合适的硬件加速选项
Returns:
Optional[str]: 硬件加速参数如果不支持则返回None
"""
# 使用新的硬件加速检测API
return ffmpeg_utils.get_ffmpeg_hwaccel_type()
def check_video_has_audio(video_path: str) -> bool:
"""
检查视频是否包含音频流
Args:
video_path: 视频文件路径
Returns:
bool: 如果视频包含音频流则返回True否则返回False
"""
if not os.path.exists(video_path):
logger.warning(f"视频文件不存在: {video_path}")
return False
probe_cmd = [
'ffprobe', '-v', 'error',
'-select_streams', 'a:0',
'-show_entries', 'stream=codec_type',
'-of', 'csv=p=0',
video_path
]
try:
result = subprocess.run(probe_cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True, check=False)
return result.stdout.strip() == 'audio'
except Exception as e:
logger.warning(f"检测视频音频流时出错: {str(e)}")
return False
def create_ffmpeg_concat_file(video_paths: List[str], concat_file_path: str) -> str:
"""
创建ffmpeg合并所需的concat文件
Args:
video_paths: 需要合并的视频文件路径列表
concat_file_path: concat文件的输出路径
Returns:
str: concat文件的路径
"""
with open(concat_file_path, 'w', encoding='utf-8') as f:
for video_path in video_paths:
# 获取绝对路径
abs_path = os.path.abspath(video_path)
# 在Windows上将反斜杠替换为正斜杠
if os.name == 'nt': # Windows系统
abs_path = abs_path.replace('\\', '/')
else: # Unix/Mac系统
# 转义特殊字符
abs_path = abs_path.replace('\\', '\\\\').replace(':', '\\:')
# 处理路径中的单引号 (如果有)
abs_path = abs_path.replace("'", "\\'")
f.write(f"file '{abs_path}'\n")
return concat_file_path
def process_single_video(
input_path: str,
output_path: str,
target_width: int,
target_height: int,
keep_audio: bool = True,
hwaccel: Optional[str] = None
) -> str:
"""
处理单个视频调整分辨率帧率等
Args:
input_path: 输入视频路径
output_path: 输出视频路径
target_width: 目标宽度
target_height: 目标高度
keep_audio: 是否保留音频
hwaccel: 硬件加速选项
Returns:
str: 处理后的视频路径
"""
if not os.path.exists(input_path):
raise FileNotFoundError(f"找不到视频文件: {input_path}")
# 构建基本命令
command = ['ffmpeg', '-y']
# 安全检查如果在Windows上则慎用硬件加速
is_windows = os.name == 'nt'
if is_windows and hwaccel:
logger.info("在Windows系统上检测到硬件加速请求将进行额外的兼容性检查")
try:
# 对视频进行快速探测,检测其基本信息
probe_cmd = [
'ffprobe', '-v', 'error',
'-select_streams', 'v:0',
'-show_entries', 'stream=codec_name,width,height',
'-of', 'csv=p=0',
input_path
]
result = subprocess.run(probe_cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True, check=False)
# 如果探测成功,使用硬件加速;否则降级到软件编码
if result.returncode != 0:
logger.warning(f"视频探测失败,为安全起见,禁用硬件加速: {result.stderr}")
hwaccel = None
except Exception as e:
logger.warning(f"视频探测出错,禁用硬件加速: {str(e)}")
hwaccel = None
# 添加硬件加速参数(使用新的智能检测机制)
if hwaccel:
try:
# 使用新的硬件加速检测API
hwaccel_args = ffmpeg_utils.get_ffmpeg_hwaccel_args()
if hwaccel_args:
command.extend(hwaccel_args)
logger.debug(f"应用硬件加速参数: {hwaccel_args}")
else:
logger.info("硬件加速不可用,将使用软件编码")
hwaccel = False # 标记为不使用硬件加速
except Exception as e:
logger.warning(f"应用硬件加速参数时出错: {str(e)},将使用软件编码")
hwaccel = False # 标记为不使用硬件加速
# 重置命令,移除可能添加了一半的硬件加速参数
command = ['ffmpeg', '-y']
# 输入文件
command.extend(['-i', input_path])
# 处理音频
if not keep_audio:
command.extend(['-an']) # 移除音频
else:
# 检查输入视频是否有音频流
has_audio = check_video_has_audio(input_path)
if has_audio:
command.extend(['-c:a', 'aac', '-b:a', '128k']) # 音频编码为AAC
else:
logger.warning(f"视频 {input_path} 没有音频流,将会忽略音频设置")
command.extend(['-an']) # 没有音频流时移除音频设置
# 视频处理参数:缩放并添加填充以保持比例
scale_filter = f"scale={target_width}:{target_height}:force_original_aspect_ratio=decrease"
pad_filter = f"pad={target_width}:{target_height}:(ow-iw)/2:(oh-ih)/2"
command.extend([
'-vf', f"{scale_filter},{pad_filter}",
'-r', '30', # 设置帧率为30fps
])
# 选择编码器 - 使用新的智能编码器选择
encoder = ffmpeg_utils.get_optimal_ffmpeg_encoder()
if hwaccel and encoder != "libx264":
logger.info(f"使用硬件编码器: {encoder}")
command.extend(['-c:v', encoder])
# 根据编码器类型添加特定参数
if "nvenc" in encoder:
command.extend(['-preset', 'p4', '-profile:v', 'high'])
elif "videotoolbox" in encoder:
command.extend(['-profile:v', 'high'])
elif "qsv" in encoder:
command.extend(['-preset', 'medium'])
elif "vaapi" in encoder:
command.extend(['-profile', '100'])
elif "amf" in encoder:
command.extend(['-quality', 'balanced'])
else:
command.extend(['-preset', 'medium', '-profile:v', 'high'])
else:
logger.info("使用软件编码器(libx264)")
command.extend(['-c:v', 'libx264', '-preset', 'medium', '-profile:v', 'high'])
# 设置视频比特率和其他参数
command.extend([
'-b:v', '5M',
'-maxrate', '8M',
'-bufsize', '10M',
'-pix_fmt', 'yuv420p', # 兼容性更好的颜色格式
])
# 输出文件
command.append(output_path)
# 执行命令
try:
# logger.info(f"执行FFmpeg命令: {' '.join(command)}")
process = subprocess.run(command, check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
logger.info(f"视频处理成功: {output_path}")
return output_path
except subprocess.CalledProcessError as e:
error_msg = e.stderr.decode() if e.stderr else str(e)
logger.error(f"处理视频失败: {error_msg}")
# 如果使用硬件加速失败,尝试使用软件编码
if hwaccel:
logger.info("硬件加速失败,尝试使用软件编码作为备选方案")
try:
# 强制使用软件编码
ffmpeg_utils.force_software_encoding()
# 构建新的命令,使用软件编码
fallback_cmd = ['ffmpeg', '-y', '-i', input_path]
# 保持原有的音频设置
if not keep_audio:
fallback_cmd.extend(['-an'])
else:
has_audio = check_video_has_audio(input_path)
if has_audio:
fallback_cmd.extend(['-c:a', 'aac', '-b:a', '128k'])
else:
fallback_cmd.extend(['-an'])
# 保持原有的视频过滤器
fallback_cmd.extend([
'-vf', f"{scale_filter},{pad_filter}",
'-r', '30',
'-c:v', 'libx264',
'-preset', 'medium',
'-profile:v', 'high',
'-b:v', '5M',
'-maxrate', '8M',
'-bufsize', '10M',
'-pix_fmt', 'yuv420p',
output_path
])
logger.info("执行软件编码备选方案")
subprocess.run(fallback_cmd, check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
logger.info(f"使用软件编码成功处理视频: {output_path}")
return output_path
except subprocess.CalledProcessError as fallback_error:
fallback_error_msg = fallback_error.stderr.decode() if fallback_error.stderr else str(fallback_error)
logger.error(f"软件编码备选方案也失败: {fallback_error_msg}")
# 尝试最基本的编码参数
try:
logger.info("尝试最基本的编码参数")
basic_cmd = [
'ffmpeg', '-y', '-i', input_path,
'-c:v', 'libx264', '-preset', 'ultrafast',
'-crf', '23', '-pix_fmt', 'yuv420p',
output_path
]
subprocess.run(basic_cmd, check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
logger.info(f"使用基本编码参数成功处理视频: {output_path}")
return output_path
except subprocess.CalledProcessError as basic_error:
basic_error_msg = basic_error.stderr.decode() if basic_error.stderr else str(basic_error)
logger.error(f"基本编码参数也失败: {basic_error_msg}")
raise RuntimeError(f"无法处理视频 {input_path}: 所有编码方案都失败")
# 如果不是硬件加速导致的问题,或者备选方案也失败了,抛出原始错误
raise RuntimeError(f"处理视频失败: {error_msg}")
def combine_clip_videos(
output_video_path: str,
video_paths: List[str],
video_ost_list: List[int],
video_aspect: VideoAspect = VideoAspect.portrait,
threads: int = 4,
force_software_encoding: bool = False, # 新参数,强制使用软件编码
) -> str:
"""
合并子视频
Args:
output_video_path: 合并后的存储路径
video_paths: 子视频路径列表
video_ost_list: 原声播放列表 (0: 不保留原声, 1: 只保留原声, 2: 保留原声并保留解说)
video_aspect: 屏幕比例
threads: 线程数
force_software_encoding: 是否强制使用软件编码忽略硬件加速检测
Returns:
str: 合并后的视频路径
"""
# 检查ffmpeg是否安装
if not check_ffmpeg_installation():
raise RuntimeError("未找到ffmpeg请先安装")
# 准备输出目录
output_dir = os.path.dirname(output_video_path)
os.makedirs(output_dir, exist_ok=True)
# 获取目标分辨率
aspect = VideoAspect(video_aspect)
video_width, video_height = aspect.to_resolution()
# 检测可用的硬件加速选项
hwaccel = None if force_software_encoding else get_hardware_acceleration_option()
if hwaccel:
logger.info(f"将使用 {hwaccel} 硬件加速")
elif force_software_encoding:
logger.info("已强制使用软件编码,跳过硬件加速检测")
else:
logger.info("未检测到兼容的硬件加速,将使用软件编码")
# Windows系统上默认使用软件编码以提高兼容性
if os.name == 'nt' and hwaccel:
logger.warning("在Windows系统上检测到硬件加速但为了提高兼容性建议使用软件编码")
# 不强制禁用hwaccel而是在process_single_video中进行额外安全检查
# 重组视频路径和原声设置为一个字典列表结构
video_segments = []
# 检查视频路径和原声设置列表长度是否匹配
if len(video_paths) != len(video_ost_list):
logger.warning(f"视频路径列表({len(video_paths)})和原声设置列表({len(video_ost_list)})长度不匹配")
# 调整长度以匹配较短的列表
min_length = min(len(video_paths), len(video_ost_list))
video_paths = video_paths[:min_length]
video_ost_list = video_ost_list[:min_length]
# 创建视频处理配置字典列表
for i, (video_path, video_ost) in enumerate(zip(video_paths, video_ost_list)):
if not os.path.exists(video_path):
logger.warning(f"视频不存在,跳过: {video_path}")
continue
# 检查是否有音频流
has_audio = check_video_has_audio(video_path)
# 构建视频片段配置
segment = {
"index": i,
"path": video_path,
"ost": video_ost,
"has_audio": has_audio,
"keep_audio": video_ost > 0 and has_audio # 只有当ost>0且实际有音频时才保留
}
# 记录日志
if video_ost > 0 and not has_audio:
logger.warning(f"视频 {video_path} 设置为保留原声(ost={video_ost}),但该视频没有音频流")
video_segments.append(segment)
# 处理每个视频片段
processed_videos = []
temp_dir = os.path.join(output_dir, "temp_videos")
os.makedirs(temp_dir, exist_ok=True)
try:
# 第一阶段:处理所有视频片段到中间文件
for segment in video_segments:
# 处理单个视频,去除或保留音频
temp_output = os.path.join(temp_dir, f"processed_{segment['index']}.mp4")
try:
process_single_video(
input_path=segment['path'],
output_path=temp_output,
target_width=video_width,
target_height=video_height,
keep_audio=segment['keep_audio'],
hwaccel=hwaccel
)
processed_videos.append({
"index": segment["index"],
"path": temp_output,
"keep_audio": segment["keep_audio"]
})
logger.info(f"视频 {segment['index'] + 1}/{len(video_segments)} 处理完成")
except Exception as e:
logger.error(f"处理视频 {segment['path']} 时出错: {str(e)}")
# 如果使用硬件加速失败,尝试使用软件编码
if hwaccel and not force_software_encoding:
logger.info(f"尝试使用软件编码处理视频 {segment['path']}")
try:
process_single_video(
input_path=segment['path'],
output_path=temp_output,
target_width=video_width,
target_height=video_height,
keep_audio=segment['keep_audio'],
hwaccel=None # 使用软件编码
)
processed_videos.append({
"index": segment["index"],
"path": temp_output,
"keep_audio": segment["keep_audio"]
})
logger.info(f"使用软件编码成功处理视频 {segment['index'] + 1}/{len(video_segments)}")
except Exception as fallback_error:
logger.error(f"使用软件编码处理视频 {segment['path']} 也失败: {str(fallback_error)}")
continue
else:
continue
if not processed_videos:
raise ValueError("没有有效的视频片段可以合并")
# 按原始索引排序处理后的视频
processed_videos.sort(key=lambda x: x["index"])
# 第二阶段:分步骤合并视频 - 避免复杂的filter_complex滤镜
try:
# 1. 首先,将所有没有音频的视频或音频被禁用的视频合并到一个临时文件中
video_paths_only = [video["path"] for video in processed_videos]
video_concat_path = os.path.join(temp_dir, "video_concat.mp4")
# 创建concat文件用于合并视频流
concat_file = os.path.join(temp_dir, "concat_list.txt")
create_ffmpeg_concat_file(video_paths_only, concat_file)
# 合并所有视频流,但不包含音频
concat_cmd = [
'ffmpeg', '-y',
'-f', 'concat',
'-safe', '0',
'-i', concat_file,
'-c:v', 'libx264',
'-preset', 'medium',
'-profile:v', 'high',
'-an', # 不包含音频
'-threads', str(threads),
video_concat_path
]
subprocess.run(concat_cmd, check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
logger.info("视频流合并完成")
# 2. 提取并合并有音频的片段
audio_segments = [video for video in processed_videos if video["keep_audio"]]
if not audio_segments:
# 如果没有音频片段,直接使用无音频的合并视频作为最终结果
shutil.copy(video_concat_path, output_video_path)
logger.info("无音频视频合并完成")
return output_video_path
# 创建音频中间文件
audio_files = []
for i, segment in enumerate(audio_segments):
# 提取音频
audio_file = os.path.join(temp_dir, f"audio_{i}.aac")
extract_audio_cmd = [
'ffmpeg', '-y',
'-i', segment["path"],
'-vn', # 不包含视频
'-c:a', 'aac',
'-b:a', '128k',
audio_file
]
subprocess.run(extract_audio_cmd, check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
audio_files.append({
"index": segment["index"],
"path": audio_file
})
logger.info(f"提取音频 {i+1}/{len(audio_segments)} 完成")
# 3. 计算每个音频片段的时间位置
audio_timings = []
current_time = 0.0
# 获取每个视频片段的时长
for i, video in enumerate(processed_videos):
duration_cmd = [
'ffprobe', '-v', 'error',
'-show_entries', 'format=duration',
'-of', 'csv=p=0',
video["path"]
]
result = subprocess.run(duration_cmd, check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
duration = float(result.stdout.strip())
# 如果当前片段需要保留音频,记录时间位置
if video["keep_audio"]:
for audio in audio_files:
if audio["index"] == video["index"]:
audio_timings.append({
"file": audio["path"],
"start": current_time,
"index": video["index"]
})
break
current_time += duration
# 4. 创建静音音频轨道作为基础
silence_audio = os.path.join(temp_dir, "silence.aac")
create_silence_cmd = [
'ffmpeg', '-y',
'-f', 'lavfi',
'-i', f'anullsrc=r=44100:cl=stereo',
'-t', str(current_time), # 总时长
'-c:a', 'aac',
'-b:a', '128k',
silence_audio
]
subprocess.run(create_silence_cmd, check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
# 5. 创建复杂滤镜命令以混合音频
filter_script = os.path.join(temp_dir, "filter_script.txt")
with open(filter_script, 'w') as f:
f.write(f"[0:a]volume=0.0[silence];\n") # 首先静音背景轨道
# 添加每个音频文件
for i, timing in enumerate(audio_timings):
f.write(f"[{i+1}:a]adelay={int(timing['start']*1000)}|{int(timing['start']*1000)}[a{i}];\n")
# 混合所有音频
mix_str = "[silence]"
for i in range(len(audio_timings)):
mix_str += f"[a{i}]"
mix_str += f"amix=inputs={len(audio_timings)+1}:duration=longest[aout]"
f.write(mix_str)
# 6. 构建音频合并命令
audio_inputs = ['-i', silence_audio]
for timing in audio_timings:
audio_inputs.extend(['-i', timing["file"]])
mixed_audio = os.path.join(temp_dir, "mixed_audio.aac")
audio_mix_cmd = [
'ffmpeg', '-y'
] + audio_inputs + [
'-filter_complex_script', filter_script,
'-map', '[aout]',
'-c:a', 'aac',
'-b:a', '128k',
mixed_audio
]
subprocess.run(audio_mix_cmd, check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
logger.info("音频混合完成")
# 7. 将合并的视频和混合的音频组合在一起
final_cmd = [
'ffmpeg', '-y',
'-i', video_concat_path,
'-i', mixed_audio,
'-c:v', 'copy',
'-c:a', 'aac',
'-map', '0:v:0',
'-map', '1:a:0',
'-shortest',
output_video_path
]
subprocess.run(final_cmd, check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
logger.info("视频最终合并完成")
return output_video_path
except subprocess.CalledProcessError as e:
logger.error(f"合并视频过程中出错: {e.stderr.decode() if e.stderr else str(e)}")
# 尝试备用合并方法 - 最简单的无音频合并
logger.info("尝试备用合并方法 - 无音频合并")
try:
concat_file = os.path.join(temp_dir, "concat_list.txt")
video_paths_only = [video["path"] for video in processed_videos]
create_ffmpeg_concat_file(video_paths_only, concat_file)
backup_cmd = [
'ffmpeg', '-y',
'-f', 'concat',
'-safe', '0',
'-i', concat_file,
'-c:v', 'copy',
'-an', # 无音频
output_video_path
]
subprocess.run(backup_cmd, check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
logger.warning("使用备用方法(无音频)成功合并视频")
return output_video_path
except Exception as backup_error:
logger.error(f"备用合并方法也失败: {str(backup_error)}")
raise RuntimeError(f"无法合并视频: {str(backup_error)}")
except Exception as e:
logger.error(f"合并视频时出错: {str(e)}")
raise
finally:
# 清理临时文件
try:
if os.path.exists(temp_dir):
shutil.rmtree(temp_dir)
logger.info("已清理临时文件")
except Exception as e:
logger.warning(f"清理临时文件时出错: {str(e)}")
if __name__ == '__main__':
video_paths = [
'/Users/apple/Desktop/home/NarratoAI/storage/temp/clip_video/S01E02_00_14_09_440.mp4',
'/Users/apple/Desktop/home/NarratoAI/storage/temp/clip_video/S01E08_00_27_11_110.mp4',
'/Users/apple/Desktop/home/NarratoAI/storage/temp/clip_video/S01E08_00_34_44_480.mp4',
'/Users/apple/Desktop/home/NarratoAI/storage/temp/clip_video/S01E08_00_42_47_630.mp4',
'/Users/apple/Desktop/home/NarratoAI/storage/temp/clip_video/S01E09_00_29_48_160.mp4'
]
combine_clip_videos(
output_video_path="/Users/apple/Desktop/home/NarratoAI/storage/temp/merge/merged_123.mp4",
video_paths=video_paths,
video_ost_list=[1, 1, 1,1,1],
video_aspect=VideoAspect.portrait,
force_software_encoding=False # 默认不强制使用软件编码,让系统自动决定
)

View File

@ -3,10 +3,11 @@ import json
import time
import asyncio
import requests
from app.utils import video_processor
from loguru import logger
from typing import List, Dict, Any, Callable
from app.utils import utils, gemini_analyzer, video_processor, video_processor_v2
from app.utils import utils, gemini_analyzer, video_processor
from app.utils.script_generator import ScriptProcessor
from app.config import config
@ -21,6 +22,7 @@ class ScriptGenerator:
video_path: str,
video_theme: str = "",
custom_prompt: str = "",
frame_interval_input: int = 5,
skip_seconds: int = 0,
threshold: int = 30,
vision_batch_size: int = 5,
@ -105,20 +107,13 @@ class ScriptGenerator:
os.makedirs(video_keyframes_dir, exist_ok=True)
try:
if config.frames.get("version") == "v2":
processor = video_processor_v2.VideoProcessor(video_path)
processor.process_video_pipeline(
output_dir=video_keyframes_dir,
skip_seconds=skip_seconds,
threshold=threshold
)
else:
processor = video_processor.VideoProcessor(video_path)
processor.process_video(
output_dir=video_keyframes_dir,
skip_seconds=skip_seconds
)
processor = video_processor.VideoProcessor(video_path)
processor.process_video_pipeline(
output_dir=video_keyframes_dir,
skip_seconds=skip_seconds,
threshold=threshold
)
for filename in sorted(os.listdir(video_keyframes_dir)):
if filename.endswith('.jpg'):
keyframe_files.append(os.path.join(video_keyframes_dir, filename))

View File

@ -4,11 +4,11 @@ import re
import traceback
from typing import Optional
from faster_whisper import WhisperModel
# from faster_whisper import WhisperModel
from timeit import default_timer as timer
from loguru import logger
import google.generativeai as genai
from moviepy.editor import VideoFileClip
from moviepy import VideoFileClip
import os
from app.config import config
@ -33,7 +33,7 @@ def create(audio_file, subtitle_file: str = ""):
"""
global model, device, compute_type
if not model:
model_path = f"{utils.root_dir()}/app/models/faster-whisper-large-v2"
model_path = f"{utils.root_dir()}/app/models/faster-whisper-large-v3"
model_bin_file = f"{model_path}/model.bin"
if not os.path.isdir(model_path) or not os.path.isfile(model_bin_file):
logger.error(
@ -45,12 +45,25 @@ def create(audio_file, subtitle_file: str = ""):
)
return None
# 尝试使用 CUDA如果失败则回退到 CPU
# 首先使用CPU模式不触发CUDA检查
use_cuda = False
try:
import torch
if torch.cuda.is_available():
# 在函数中延迟导入torch而不是在全局范围内
# 使用安全的方式检查CUDA可用性
def check_cuda_available():
try:
import torch
return torch.cuda.is_available()
except (ImportError, RuntimeError) as e:
logger.warning(f"检查CUDA可用性时出错: {e}")
return False
# 仅当明确需要时才检查CUDA
use_cuda = check_cuda_available()
if use_cuda:
logger.info(f"尝试使用 CUDA 加载模型: {model_path}")
try:
logger.info(f"尝试使用 CUDA 加载模型: {model_path}")
model = WhisperModel(
model_size_or_path=model_path,
device="cuda",
@ -63,18 +76,18 @@ def create(audio_file, subtitle_file: str = ""):
except Exception as e:
logger.warning(f"CUDA 加载失败,错误信息: {str(e)}")
logger.warning("回退到 CPU 模式")
device = "cpu"
compute_type = "int8"
use_cuda = False
else:
logger.info("未检测到 CUDA使用 CPU 模式")
device = "cpu"
compute_type = "int8"
except ImportError:
logger.warning("未安装 torch使用 CPU 模式")
logger.info("使用 CPU 模式")
except Exception as e:
logger.warning(f"CUDA检查过程出错: {e}")
logger.warning("默认使用CPU模式")
use_cuda = False
# 如果CUDA不可用或加载失败使用CPU
if not use_cuda:
device = "cpu"
compute_type = "int8"
if device == "cpu":
logger.info(f"使用 CPU 加载模型: {model_path}")
model = WhisperModel(
model_size_or_path=model_path,
@ -403,7 +416,7 @@ def extract_audio_and_create_subtitle(video_file: str, subtitle_file: str = "")
logger.info("音频提取完成,开始生成字幕")
# 使用create函数生成字幕
create(audio_file, subtitle_file)
create("/Users/apple/Desktop/WhisperX-zhuanlu/1_qyn2-2_Vocals.wav", subtitle_file)
# 删除临时音频文件
if os.path.exists(audio_file):
@ -422,8 +435,8 @@ if __name__ == "__main__":
task_id = "123456"
task_dir = utils.task_dir(task_id)
subtitle_file = f"{task_dir}/subtitle_123456.srt"
audio_file = f"{task_dir}/audio.wav"
video_file = "/Users/apple/Desktop/home/NarratoAI/resource/videos/merged_video_1702.mp4"
audio_file = "/Users/apple/Desktop/WhisperX-zhuanlu/1_qyn2-2_Vocals.wav"
video_file = "/Users/apple/Desktop/home/NarratoAI/storage/temp/merge/qyn2-2-720p.mp4"
extract_audio_and_create_subtitle(video_file, subtitle_file)

View File

@ -0,0 +1,202 @@
#!/usr/bin/env python
# -*- coding: UTF-8 -*-
'''
@Project: NarratoAI
@File : subtitle_merger
@Author : viccy
@Date : 2025/5/6 下午4:00
'''
import re
import os
from datetime import datetime, timedelta
def parse_time(time_str):
"""解析时间字符串为timedelta对象"""
hours, minutes, seconds_ms = time_str.split(':')
seconds, milliseconds = seconds_ms.split(',')
td = timedelta(
hours=int(hours),
minutes=int(minutes),
seconds=int(seconds),
milliseconds=int(milliseconds)
)
return td
def format_time(td):
"""将timedelta对象格式化为SRT时间字符串"""
total_seconds = int(td.total_seconds())
hours = total_seconds // 3600
minutes = (total_seconds % 3600) // 60
seconds = total_seconds % 60
milliseconds = td.microseconds // 1000
return f"{hours:02d}:{minutes:02d}:{seconds:02d},{milliseconds:03d}"
def parse_edited_time_range(time_range_str):
"""从editedTimeRange字符串中提取时间范围"""
if not time_range_str:
return None, None
parts = time_range_str.split('-')
if len(parts) != 2:
return None, None
start_time_str, end_time_str = parts
# 将HH:MM:SS格式转换为timedelta
start_h, start_m, start_s = map(int, start_time_str.split(':'))
end_h, end_m, end_s = map(int, end_time_str.split(':'))
start_time = timedelta(hours=start_h, minutes=start_m, seconds=start_s)
end_time = timedelta(hours=end_h, minutes=end_m, seconds=end_s)
return start_time, end_time
def merge_subtitle_files(subtitle_items, output_file=None):
"""
合并多个SRT字幕文件
参数:
subtitle_items: 字典列表每个字典包含subtitle文件路径和editedTimeRange
output_file: 输出文件的路径如果为None则自动生成
返回:
合并后的字幕文件路径
"""
# 按照editedTimeRange的开始时间排序
sorted_items = sorted(subtitle_items,
key=lambda x: parse_edited_time_range(x.get('editedTimeRange', ''))[0] or timedelta())
merged_subtitles = []
subtitle_index = 1
for item in sorted_items:
if not item.get('subtitle') or not os.path.exists(item.get('subtitle')):
continue
# 从editedTimeRange获取起始时间偏移
offset_time, _ = parse_edited_time_range(item.get('editedTimeRange', ''))
if offset_time is None:
print(f"警告: 无法从项目 {item.get('_id')} 的editedTimeRange中提取时间范围跳过该项")
continue
with open(item['subtitle'], 'r', encoding='utf-8') as file:
content = file.read()
# 解析字幕文件
subtitle_blocks = re.split(r'\n\s*\n', content.strip())
for block in subtitle_blocks:
lines = block.strip().split('\n')
if len(lines) < 3: # 确保块有足够的行数
continue
# 解析时间轴行
time_line = lines[1]
time_parts = time_line.split(' --> ')
if len(time_parts) != 2:
continue
start_time = parse_time(time_parts[0])
end_time = parse_time(time_parts[1])
# 应用时间偏移
adjusted_start_time = start_time + offset_time
adjusted_end_time = end_time + offset_time
# 重建字幕块
adjusted_time_line = f"{format_time(adjusted_start_time)} --> {format_time(adjusted_end_time)}"
text_lines = lines[2:]
new_block = [
str(subtitle_index),
adjusted_time_line,
*text_lines
]
merged_subtitles.append('\n'.join(new_block))
subtitle_index += 1
# 确定输出文件路径
if output_file is None:
dir_path = os.path.dirname(sorted_items[0]['subtitle'])
first_start = parse_edited_time_range(sorted_items[0]['editedTimeRange'])[0]
last_end = parse_edited_time_range(sorted_items[-1]['editedTimeRange'])[1]
first_start_h, first_start_m, first_start_s = int(first_start.seconds // 3600), int((first_start.seconds % 3600) // 60), int(first_start.seconds % 60)
last_end_h, last_end_m, last_end_s = int(last_end.seconds // 3600), int((last_end.seconds % 3600) // 60), int(last_end.seconds % 60)
first_start_str = f"{first_start_h:02d}_{first_start_m:02d}_{first_start_s:02d}"
last_end_str = f"{last_end_h:02d}_{last_end_m:02d}_{last_end_s:02d}"
output_file = os.path.join(dir_path, f"merged_subtitle_{first_start_str}-{last_end_str}.srt")
# 合并所有字幕块
merged_content = '\n\n'.join(merged_subtitles)
# 写入合并后的内容
with open(output_file, 'w', encoding='utf-8') as file:
file.write(merged_content)
return output_file
if __name__ == '__main__':
# 测试数据
test_data = [
{'picture': '【解说】好的,各位,欢迎回到我的频道!《庆余年 2》刚开播就给了我们一个王炸范闲在北齐""了?这怎么可能!',
'timestamp': '00:00:00-00:01:15',
'narration': '好的各位,欢迎回到我的频道!《庆余年 2》刚开播就给了我们一个王炸范闲在北齐""了?这怎么可能!上集片尾那个巨大的悬念,这一集就立刻揭晓了!范闲假死归来,他面临的第一个,也是最大的难关,就是如何面对他最敬爱的,同时也是最可怕的那个人——庆帝!',
'OST': 0,
'_id': 1,
'audio': '/Users/apple/Desktop/home/NarratoAI/storage/tasks/qyn2-2-demo/audio_00_00_00-00_01_15.mp3',
'subtitle': '/Users/apple/Desktop/home/NarratoAI/storage/tasks/qyn2-2-demo/subtitle_00_00_00-00_01_15.srt',
'sourceTimeRange': '00:00:00-00:00:26',
'duration': 26,
'editedTimeRange': '00:00:00-00:00:26'
},
{'picture': '【解说】上一集我们看到,范闲在北齐遭遇了惊天变故,生死不明!',
'timestamp': '00:01:15-00:04:40',
'narration': '但我们都知道,他绝不可能就这么轻易退场!第二集一开场,范闲就已经秘密回到了京都。他的生死传闻,可不像我们想象中那样只是小范围流传,而是…',
'OST': 0,
'_id': 2,
'audio': '/Users/apple/Desktop/home/NarratoAI/storage/tasks/qyn2-2-demo/audio_00_01_15-00_04_40.mp3',
'subtitle': '/Users/apple/Desktop/home/NarratoAI/storage/tasks/qyn2-2-demo/subtitle_00_01_15-00_04_40.srt',
'sourceTimeRange': '00:01:15-00:01:29',
'duration': 14,
'editedTimeRange': '00:00:26-00:00:40'
},
{'picture': '【解说】"欺君之罪"!在封建王朝,这可是抄家灭族的大罪!搁一般人,肯定脚底抹油溜之大吉了。',
'timestamp': '00:04:58-00:05:45',
'narration': '"欺君之罪"!在封建王朝,这可是抄家灭族的大罪!搁一般人,肯定脚底抹油溜之大吉了。但范闲是谁啊?他偏要反其道而行之!他竟然决定,直接去见庆帝!冒着天大的风险,用"假死"这个事实去赌庆帝的态度!',
'OST': 0,
'_id': 4,
'audio': '/Users/apple/Desktop/home/NarratoAI/storage/tasks/qyn2-2-demo/audio_00_04_58-00_05_45.mp3',
'subtitle': '/Users/apple/Desktop/home/NarratoAI/storage/tasks/qyn2-2-demo/subtitle_00_04_58-00_05_45.srt',
'sourceTimeRange': '00:04:58-00:05:20',
'duration': 22,
'editedTimeRange': '00:00:57-00:01:19'
},
{'picture': '【解说】但想见庆帝,哪有那么容易?范闲艺高人胆大,竟然选择了最激进的方式——闯宫!',
'timestamp': '00:05:45-00:06:00',
'narration': '但想见庆帝,哪有那么容易?范闲艺高人胆大,竟然选择了最激进的方式——闯宫!',
'OST': 0,
'_id': 5,
'audio': '/Users/apple/Desktop/home/NarratoAI/storage/tasks/qyn2-2-demo/audio_00_05_45-00_06_00.mp3',
'subtitle': '/Users/apple/Desktop/home/NarratoAI/storage/tasks/qyn2-2-demo/subtitle_00_05_45-00_06_00.srt',
'sourceTimeRange': '00:05:45-00:05:53',
'duration': 8,
'editedTimeRange': '00:01:19-00:01:27'
}
]
output_file = merge_subtitle_files(test_data)
print(f"字幕文件已合并至: {output_file}")

View File

@ -9,167 +9,177 @@ from loguru import logger
from app.config import config
from app.models import const
from app.models.schema import VideoConcatMode, VideoParams, VideoClipParams
from app.services import llm, material, subtitle, video, voice, audio_merger
from app.services import (llm, material, subtitle, video, voice, audio_merger,
subtitle_merger, clip_video, merger_video, update_script, generate_video)
from app.services import state as sm
from app.utils import utils
def generate_script(task_id, params):
logger.info("\n\n## generating video script")
video_script = params.video_script.strip()
if not video_script:
video_script = llm.generate_script(
video_subject=params.video_subject,
language=params.video_language,
paragraph_number=params.paragraph_number,
)
else:
logger.debug(f"video script: \n{video_script}")
# def generate_script(task_id, params):
# logger.info("\n\n## generating video script")
# video_script = params.video_script.strip()
# if not video_script:
# video_script = llm.generate_script(
# video_subject=params.video_subject,
# language=params.video_language,
# paragraph_number=params.paragraph_number,
# )
# else:
# logger.debug(f"video script: \n{video_script}")
if not video_script:
sm.state.update_task(task_id, state=const.TASK_STATE_FAILED)
logger.error("failed to generate video script.")
return None
# if not video_script:
# sm.state.update_task(task_id, state=const.TASK_STATE_FAILED)
# logger.error("failed to generate video script.")
# return None
return video_script
# return video_script
def generate_terms(task_id, params, video_script):
logger.info("\n\n## generating video terms")
video_terms = params.video_terms
if not video_terms:
video_terms = llm.generate_terms(
video_subject=params.video_subject, video_script=video_script, amount=5
)
else:
if isinstance(video_terms, str):
video_terms = [term.strip() for term in re.split(r"[,]", video_terms)]
elif isinstance(video_terms, list):
video_terms = [term.strip() for term in video_terms]
else:
raise ValueError("video_terms must be a string or a list of strings.")
# def generate_terms(task_id, params, video_script):
# logger.info("\n\n## generating video terms")
# video_terms = params.video_terms
# if not video_terms:
# video_terms = llm.generate_terms(
# video_subject=params.video_subject, video_script=video_script, amount=5
# )
# else:
# if isinstance(video_terms, str):
# video_terms = [term.strip() for term in re.split(r"[,]", video_terms)]
# elif isinstance(video_terms, list):
# video_terms = [term.strip() for term in video_terms]
# else:
# raise ValueError("video_terms must be a string or a list of strings.")
logger.debug(f"video terms: {utils.to_json(video_terms)}")
# logger.debug(f"video terms: {utils.to_json(video_terms)}")
if not video_terms:
sm.state.update_task(task_id, state=const.TASK_STATE_FAILED)
logger.error("failed to generate video terms.")
return None
# if not video_terms:
# sm.state.update_task(task_id, state=const.TASK_STATE_FAILED)
# logger.error("failed to generate video terms.")
# return None
return video_terms
# return video_terms
def save_script_data(task_id, video_script, video_terms, params):
script_file = path.join(utils.task_dir(task_id), "script.json")
script_data = {
"script": video_script,
"search_terms": video_terms,
"params": params,
}
# def save_script_data(task_id, video_script, video_terms, params):
# script_file = path.join(utils.task_dir(task_id), "script.json")
# script_data = {
# "script": video_script,
# "search_terms": video_terms,
# "params": params,
# }
with open(script_file, "w", encoding="utf-8") as f:
f.write(utils.to_json(script_data))
# with open(script_file, "w", encoding="utf-8") as f:
# f.write(utils.to_json(script_data))
def generate_audio(task_id, params, video_script):
logger.info("\n\n## generating audio")
audio_file = path.join(utils.task_dir(task_id), "audio.mp3")
sub_maker = voice.tts(
text=video_script,
voice_name=voice.parse_voice_name(params.voice_name),
voice_rate=params.voice_rate,
voice_file=audio_file,
)
if sub_maker is None:
sm.state.update_task(task_id, state=const.TASK_STATE_FAILED)
logger.error(
"""failed to generate audio:
1. check if the language of the voice matches the language of the video script.
2. check if the network is available. If you are in China, it is recommended to use a VPN and enable the global traffic mode.
""".strip()
)
return None, None, None
# def generate_audio(task_id, params, video_script):
# logger.info("\n\n## generating audio")
# audio_file = path.join(utils.task_dir(task_id), "audio.mp3")
# sub_maker = voice.tts(
# text=video_script,
# voice_name=voice.parse_voice_name(params.voice_name),
# voice_rate=params.voice_rate,
# voice_file=audio_file,
# )
# if sub_maker is None:
# sm.state.update_task(task_id, state=const.TASK_STATE_FAILED)
# logger.error(
# """failed to generate audio:
# 1. check if the language of the voice matches the language of the video script.
# 2. check if the network is available. If you are in China, it is recommended to use a VPN and enable the global traffic mode.
# """.strip()
# )
# return None, None, None
audio_duration = math.ceil(voice.get_audio_duration(sub_maker))
return audio_file, audio_duration, sub_maker
# audio_duration = math.ceil(voice.get_audio_duration(sub_maker))
# return audio_file, audio_duration, sub_maker
def generate_subtitle(task_id, params, video_script, sub_maker, audio_file):
if not params.subtitle_enabled:
return ""
# def generate_subtitle(task_id, params, video_script, sub_maker, audio_file):
# if not params.subtitle_enabled:
# return ""
subtitle_path = path.join(utils.task_dir(task_id), "subtitle111.srt")
subtitle_provider = config.app.get("subtitle_provider", "").strip().lower()
logger.info(f"\n\n## generating subtitle, provider: {subtitle_provider}")
# subtitle_path = path.join(utils.task_dir(task_id), "subtitle111.srt")
# subtitle_provider = config.app.get("subtitle_provider", "").strip().lower()
# logger.info(f"\n\n## generating subtitle, provider: {subtitle_provider}")
subtitle_fallback = False
if subtitle_provider == "edge":
voice.create_subtitle(
text=video_script, sub_maker=sub_maker, subtitle_file=subtitle_path
)
if not os.path.exists(subtitle_path):
subtitle_fallback = True
logger.warning("subtitle file not found, fallback to whisper")
# subtitle_fallback = False
# if subtitle_provider == "edge":
# voice.create_subtitle(
# text=video_script, sub_maker=sub_maker, subtitle_file=subtitle_path
# )
# if not os.path.exists(subtitle_path):
# subtitle_fallback = True
# logger.warning("subtitle file not found, fallback to whisper")
if subtitle_provider == "whisper" or subtitle_fallback:
subtitle.create(audio_file=audio_file, subtitle_file=subtitle_path)
logger.info("\n\n## correcting subtitle")
subtitle.correct(subtitle_file=subtitle_path, video_script=video_script)
# if subtitle_provider == "whisper" or subtitle_fallback:
# subtitle.create(audio_file=audio_file, subtitle_file=subtitle_path)
# logger.info("\n\n## correcting subtitle")
# subtitle.correct(subtitle_file=subtitle_path, video_script=video_script)
subtitle_lines = subtitle.file_to_subtitles(subtitle_path)
if not subtitle_lines:
logger.warning(f"subtitle file is invalid: {subtitle_path}")
return ""
# subtitle_lines = subtitle.file_to_subtitles(subtitle_path)
# if not subtitle_lines:
# logger.warning(f"subtitle file is invalid: {subtitle_path}")
# return ""
return subtitle_path
# return subtitle_path
def get_video_materials(task_id, params, video_terms, audio_duration):
if params.video_source == "local":
logger.info("\n\n## preprocess local materials")
materials = video.preprocess_video(
materials=params.video_materials, clip_duration=params.video_clip_duration
)
if not materials:
sm.state.update_task(task_id, state=const.TASK_STATE_FAILED)
logger.error(
"no valid materials found, please check the materials and try again."
)
return None
return [material_info.url for material_info in materials]
else:
logger.info(f"\n\n## downloading videos from {params.video_source}")
downloaded_videos = material.download_videos(
task_id=task_id,
search_terms=video_terms,
source=params.video_source,
video_aspect=params.video_aspect,
video_contact_mode=params.video_concat_mode,
audio_duration=audio_duration * params.video_count,
max_clip_duration=params.video_clip_duration,
)
if not downloaded_videos:
sm.state.update_task(task_id, state=const.TASK_STATE_FAILED)
logger.error(
"failed to download videos, maybe the network is not available. if you are in China, please use a VPN."
)
return None
return downloaded_videos
# def get_video_materials(task_id, params, video_terms, audio_duration):
# if params.video_source == "local":
# logger.info("\n\n## preprocess local materials")
# materials = video.preprocess_video(
# materials=params.video_materials, clip_duration=params.video_clip_duration
# )
# if not materials:
# sm.state.update_task(task_id, state=const.TASK_STATE_FAILED)
# logger.error(
# "no valid materials found, please check the materials and try again."
# )
# return None
# return [material_info.url for material_info in materials]
# else:
# logger.info(f"\n\n## downloading videos from {params.video_source}")
# downloaded_videos = material.download_videos(
# task_id=task_id,
# search_terms=video_terms,
# source=params.video_source,
# video_aspect=params.video_aspect,
# video_contact_mode=params.video_concat_mode,
# audio_duration=audio_duration * params.video_count,
# max_clip_duration=params.video_clip_duration,
# )
# if not downloaded_videos:
# sm.state.update_task(task_id, state=const.TASK_STATE_FAILED)
# logger.error(
# "failed to download videos, maybe the network is not available. if you are in China, please use a VPN."
# )
# return None
# return downloaded_videos
def start_subclip(task_id: str, params: VideoClipParams, subclip_path_videos: dict):
"""后台任务(自动剪辑视频进行剪辑)"""
"""
后台任务自动剪辑视频进行剪辑
Args:
task_id: 任务ID
params: 视频参数
subclip_path_videos: 视频片段路径
"""
global merged_audio_path, merged_subtitle_path
logger.info(f"\n\n## 开始任务: {task_id}")
# 初始化 ImageMagick
if not utils.init_imagemagick():
logger.warning("ImageMagick 初始化失败,字幕可能无法正常显示")
sm.state.update_task(task_id, state=const.TASK_STATE_PROCESSING, progress=5)
sm.state.update_task(task_id, state=const.TASK_STATE_PROCESSING, progress=0)
# tts 角色名称
voice_name = voice.parse_voice_name(params.voice_name)
# # 初始化 ImageMagick
# if not utils.init_imagemagick():
# logger.warning("ImageMagick 初始化失败,字幕可能无法正常显示")
# # tts 角色名称
# voice_name = voice.parse_voice_name(params.voice_name)
"""
1. 加载剪辑脚本
"""
logger.info("\n\n## 1. 加载视频脚本")
video_script_path = path.join(params.video_clip_json_path)
@ -185,174 +195,145 @@ def start_subclip(task_id: str, params: VideoClipParams, subclip_path_videos: di
logger.debug(f"解说完整脚本: \n{video_script}")
logger.debug(f"解说 OST 列表: \n{video_ost}")
logger.debug(f"解说时间戳列表: \n{time_list}")
# 获取视频总时长(单位 s)
last_timestamp = list_script[-1]['new_timestamp']
end_time = last_timestamp.split("-")[1]
total_duration = utils.time_to_seconds(end_time)
except Exception as e:
logger.error(f"无法读取视频json脚本请检查配置是否正确。{e}")
raise ValueError("无法读取视频json脚本请检查配置是否正确")
logger.error(f"无法读取视频json脚本请检查脚本格式是否正确")
raise ValueError("无法读取视频json脚本请检查脚本格式是否正确")
else:
logger.error(f"video_script_path: {video_script_path} \n\n", traceback.format_exc())
raise ValueError("解说脚本不存在!请检查配置是否正确。")
"""
2. 使用 TTS 生成音频素材
"""
logger.info("\n\n## 2. 根据OST设置生成音频列表")
# 只为OST=0或2的片段生成TTS音频
# 只为OST=0 or 2的判断生成音频 OST=0 仅保留解说 OST=2 保留解说和原声
tts_segments = [
segment for segment in list_script
if segment['OST'] in [0, 2]
]
logger.debug(f"需要生成TTS的片段数: {len(tts_segments)}")
# 初始化音频文件路径
audio_files = []
final_audio = ""
tts_results = voice.tts_multiple(
task_id=task_id,
list_script=tts_segments, # 只传入需要TTS的片段
voice_name=params.voice_name,
voice_rate=params.voice_rate,
voice_pitch=params.voice_pitch,
)
sm.state.update_task(task_id, state=const.TASK_STATE_PROCESSING, progress=20)
# """
# 3. (可选) 使用 whisper 生成字幕
# """
# if merged_subtitle_path is None:
# if audio_files:
# merged_subtitle_path = path.join(utils.task_dir(task_id), f"subtitle.srt")
# subtitle_provider = config.app.get("subtitle_provider", "").strip().lower()
# logger.info(f"\n\n使用 {subtitle_provider} 生成字幕")
#
# subtitle.create(
# audio_file=merged_audio_path,
# subtitle_file=merged_subtitle_path,
# )
# subtitle_lines = subtitle.file_to_subtitles(merged_subtitle_path)
# if not subtitle_lines:
# logger.warning(f"字幕文件无效: {merged_subtitle_path}")
#
# sm.state.update_task(task_id, state=const.TASK_STATE_PROCESSING, progress=40)
"""
3. 裁剪视频 - 将超出音频长度的视频进行裁剪
"""
logger.info("\n\n## 3. 裁剪视频")
video_clip_result = clip_video.clip_video(params.video_origin_path, tts_results)
# 更新 list_script 中的时间戳
tts_clip_result = {tts_result['_id']: tts_result['audio_file'] for tts_result in tts_results}
subclip_clip_result = {
tts_result['_id']: tts_result['subtitle_file'] for tts_result in tts_results
}
new_script_list = update_script.update_script_timestamps(list_script, video_clip_result, tts_clip_result, subclip_clip_result)
sm.state.update_task(task_id, state=const.TASK_STATE_PROCESSING, progress=60)
"""
4. 合并音频和字幕
"""
logger.info("\n\n## 4. 合并音频和字幕")
total_duration = sum([script["duration"] for script in new_script_list])
if tts_segments:
audio_files, sub_maker_list = voice.tts_multiple(
task_id=task_id,
list_script=tts_segments, # 只传入需要TTS的片段
voice_name=voice_name,
voice_rate=params.voice_rate,
voice_pitch=params.voice_pitch,
force_regenerate=True
)
if audio_files is None:
sm.state.update_task(task_id, state=const.TASK_STATE_FAILED)
logger.error("TTS转换音频失败, 可能是网络不可用! 如果您在中国, 请使用VPN.")
return
if audio_files:
logger.info(f"合并音频文件: {audio_files}")
try:
# 传入OST信息以便正确处理音频
final_audio = audio_merger.merge_audio_files(
task_id=task_id,
audio_files=audio_files,
total_duration=total_duration,
list_script=list_script # 传入完整脚本以便处理OST
)
logger.info("音频文件合并成功")
except Exception as e:
logger.error(f"合并音频文件失败: {str(e)}")
final_audio = ""
else:
# 如果没有需要生成TTS的片段创建一个空白音频文件
# 这样可以确保后续的音频处理能正确进行
logger.info("没有需要生成TTS的片段将保留原声和背景音乐")
final_audio = path.join(utils.task_dir(task_id), "empty.mp3")
try:
from moviepy.editor import AudioClip
# 创建一个与视频等长的空白音频
empty_audio = AudioClip(make_frame=lambda t: 0, duration=total_duration)
empty_audio.write_audiofile(final_audio, fps=44100)
logger.info(f"已创建空白音频文件: {final_audio}")
except Exception as e:
logger.error(f"创建空白音频文件失败: {str(e)}")
final_audio = ""
sm.state.update_task(task_id, state=const.TASK_STATE_PROCESSING, progress=30)
subtitle_path = ""
if params.subtitle_enabled:
if audio_files:
subtitle_path = path.join(utils.task_dir(task_id), f"subtitle.srt")
subtitle_provider = config.app.get("subtitle_provider", "").strip().lower()
logger.info(f"\n\n## 3. 生成字幕、提供程序是: {subtitle_provider}")
subtitle.create(
audio_file=final_audio,
subtitle_file=subtitle_path,
# 合并音频文件
merged_audio_path = audio_merger.merge_audio_files(
task_id=task_id,
total_duration=total_duration,
list_script=new_script_list
)
logger.info(f"音频文件合并成功->{merged_audio_path}")
# 合并字幕文件
merged_subtitle_path = subtitle_merger.merge_subtitle_files(new_script_list)
logger.info(f"字幕文件合并成功->{merged_subtitle_path}")
except Exception as e:
logger.error(f"合并音频文件失败: {str(e)}")
else:
logger.warning("没有需要合并的音频/字幕")
merged_audio_path = ""
merged_subtitle_path = ""
subtitle_lines = subtitle.file_to_subtitles(subtitle_path)
if not subtitle_lines:
logger.warning(f"字幕文件无效: {subtitle_path}")
subtitle_path = ""
sm.state.update_task(task_id, state=const.TASK_STATE_PROCESSING, progress=40)
logger.info("\n\n## 4. 裁剪视频")
subclip_videos = [x for x in subclip_path_videos.values()]
# logger.debug(f"\n\n## 裁剪后的视频文件列表: \n{subclip_videos}")
if not subclip_videos:
sm.state.update_task(task_id, state=const.TASK_STATE_FAILED)
logger.error(
"裁剪视频失败,可能是 ImageMagick 不可用")
return
sm.state.update_task(task_id, state=const.TASK_STATE_PROCESSING, progress=50)
"""
5. 合并视频
"""
final_video_paths = []
combined_video_paths = []
_progress = 50
index = 1
combined_video_path = path.join(utils.task_dir(task_id), f"combined.mp4")
combined_video_path = path.join(utils.task_dir(task_id), f"merger.mp4")
logger.info(f"\n\n## 5. 合并视频: => {combined_video_path}")
# 如果 new_script_list 中没有 video则使用 subclip_path_videos 中的视频
video_clips = [new_script['video'] if new_script.get('video') else subclip_path_videos.get(new_script.get('_id', '')) for new_script in new_script_list]
video.combine_clip_videos(
combined_video_path=combined_video_path,
video_paths=subclip_videos,
merger_video.combine_clip_videos(
output_video_path=combined_video_path,
video_paths=video_clips,
video_ost_list=video_ost,
list_script=list_script,
video_aspect=params.video_aspect,
threads=params.n_threads # 多线程
threads=params.n_threads
)
sm.state.update_task(task_id, state=const.TASK_STATE_PROCESSING, progress=80)
_progress += 50 / 2
sm.state.update_task(task_id, progress=_progress)
"""
6. 合并字幕/BGM/配音/视频
"""
output_video_path = path.join(utils.task_dir(task_id), f"combined.mp4")
logger.info(f"\n\n## 6. 最后一步: 合并字幕/BGM/配音/视频 -> {output_video_path}")
final_video_path = path.join(utils.task_dir(task_id), f"final-{index}.mp4")
# bgm_path = '/Users/apple/Desktop/home/NarratoAI/resource/songs/bgm.mp3'
bgm_path = utils.get_bgm_file()
logger.info(f"\n\n## 6. 最后合成: {index} => {final_video_path}")
# 获取背景音乐
bgm_path = None
if params.bgm_type or params.bgm_file:
try:
bgm_path = utils.get_bgm_file(bgm_type=params.bgm_type, bgm_file=params.bgm_file)
if bgm_path:
logger.info(f"使用背景音乐: {bgm_path}")
except Exception as e:
logger.error(f"获取背景音乐失败: {str(e)}")
# 示例:自定义字幕样式
subtitle_style = {
'fontsize': params.font_size, # 字体大小
'color': params.text_fore_color, # 字体颜色
'stroke_color': params.stroke_color, # 描边颜色
'stroke_width': params.stroke_width, # 描边宽度, 范围0-10
'bg_color': params.text_back_color, # 半透明黑色背景
'position': (params.subtitle_position, 0.2), # 距离顶部60%的位置
'method': 'caption' # 渲染方法
# 调用示例
options = {
'voice_volume': params.tts_volume, # 配音音量
'bgm_volume': params.bgm_volume, # 背景音乐音量
'original_audio_volume': params.original_volume, # 视频原声音量0表示不保留
'keep_original_audio': True, # 是否保留原声
'subtitle_enabled': params.subtitle_enabled, # 是否启用字幕 - 修复字幕开关bug
'subtitle_font': params.font_name, # 这里使用相对字体路径,会自动在 font_dir() 目录下查找
'subtitle_font_size': params.font_size,
'subtitle_color': params.text_fore_color,
'subtitle_bg_color': None, # 直接使用None表示透明背景
'subtitle_position': params.subtitle_position,
'custom_position': params.custom_position,
'threads': params.n_threads
}
# 示例:自定义音量配置
volume_config = {
'original': params.original_volume, # 原声音量80%
'bgm': params.bgm_volume, # BGM音量20%
'narration': params.tts_volume or params.voice_volume, # 解说音量100%
}
font_path = utils.font_dir(params.font_name)
video.generate_video_v3(
generate_video.merge_materials(
video_path=combined_video_path,
subtitle_path=subtitle_path,
audio_path=merged_audio_path,
subtitle_path=merged_subtitle_path,
bgm_path=bgm_path,
narration_path=final_audio,
output_path=final_video_path,
volume_config=volume_config, # 添加音量配置
subtitle_style=subtitle_style,
font_path=font_path
output_path=output_video_path,
options=options
)
_progress += 50 / 2
sm.state.update_task(task_id, progress=_progress)
final_video_paths.append(final_video_path)
final_video_paths.append(output_video_path)
combined_video_paths.append(combined_video_path)
logger.success(f"任务 {task_id} 已完成, 生成 {len(final_video_paths)} 个视频.")
@ -400,35 +381,19 @@ def validate_params(video_path, audio_path, output_file, params):
if __name__ == "__main__":
# task_id = "test123"
# subclip_path_videos = {'00:41-01:58': 'E:\\projects\\NarratoAI\\storage\\cache_videos/vid-00_41-01_58.mp4',
# '00:06-00:15': 'E:\\projects\\NarratoAI\\storage\\cache_videos/vid-00_06-00_15.mp4',
# '01:10-01:17': 'E:\\projects\\NarratoAI\\storage\\cache_videos/vid-01_10-01_17.mp4',
# '00:47-01:03': 'E:\\projects\\NarratoAI\\storage\\cache_videos/vid-00_47-01_03.mp4',
# '01:03-01:10': 'E:\\projects\\NarratoAI\\storage\\cache_videos/vid-01_03-01_10.mp4',
# '02:40-03:08': 'E:\\projects\\NarratoAI\\storage\\cache_videos/vid-02_40-03_08.mp4',
# '03:02-03:20': 'E:\\projects\\NarratoAI\\storage\\cache_videos/vid-03_02-03_20.mp4',
# '03:18-03:20': 'E:\\projects\\NarratoAI\\storage\\cache_videos/vid-03_18-03_20.mp4'}
#
# params = VideoClipParams(
# video_clip_json_path="E:\\projects\\NarratoAI\\resource/scripts/test003.json",
# video_origin_path="E:\\projects\\NarratoAI\\resource/videos/1.mp4",
# )
# start_subclip(task_id, params, subclip_path_videos=subclip_path_videos)
task_id = "demo"
task_id = "test456"
subclip_path_videos = {'01:10-01:17': './storage/cache_videos/vid-01_10-01_17.mp4',
'01:58-02:04': './storage/cache_videos/vid-01_58-02_04.mp4',
'02:25-02:31': './storage/cache_videos/vid-02_25-02_31.mp4',
'01:28-01:33': './storage/cache_videos/vid-01_28-01_33.mp4',
'03:14-03:18': './storage/cache_videos/vid-03_14-03_18.mp4',
'00:24-00:28': './storage/cache_videos/vid-00_24-00_28.mp4',
'03:02-03:08': './storage/cache_videos/vid-03_02-03_08.mp4',
'00:41-00:44': './storage/cache_videos/vid-00_41-00_44.mp4',
'02:12-02:25': './storage/cache_videos/vid-02_12-02_25.mp4'}
# 提前裁剪是为了方便检查视频
subclip_path_videos = {
1: '/Users/apple/Desktop/home/NarratoAI/storage/temp/clip_video/113343d127b5a09d0bf84b68bd1b3b97/vid_00-00-05-390@00-00-57-980.mp4',
2: '/Users/apple/Desktop/home/NarratoAI/storage/temp/clip_video/113343d127b5a09d0bf84b68bd1b3b97/vid_00-00-28-900@00-00-43-700.mp4',
3: '/Users/apple/Desktop/home/NarratoAI/storage/temp/clip_video/113343d127b5a09d0bf84b68bd1b3b97/vid_00-01-17-840@00-01-27-600.mp4',
4: '/Users/apple/Desktop/home/NarratoAI/storage/temp/clip_video/113343d127b5a09d0bf84b68bd1b3b97/vid_00-02-35-460@00-02-52-380.mp4',
5: '/Users/apple/Desktop/home/NarratoAI/storage/temp/clip_video/113343d127b5a09d0bf84b68bd1b3b97/vid_00-06-59-520@00-07-29-500.mp4',
}
params = VideoClipParams(
video_clip_json_path="/Users/apple/Desktop/home/NarratoAI/resource/scripts/test004.json",
video_origin_path="/Users/apple/Desktop/home/NarratoAI/resource/videos/1.mp4",
video_clip_json_path="/Users/apple/Desktop/home/NarratoAI/resource/scripts/2025-0507-223311.json",
video_origin_path="/Users/apple/Desktop/home/NarratoAI/resource/videos/merged_video_4938.mp4",
)
start_subclip(task_id, params, subclip_path_videos=subclip_path_videos)
start_subclip(task_id, params, subclip_path_videos)

View File

@ -0,0 +1,266 @@
#!/usr/bin/env python
# -*- coding: UTF-8 -*-
'''
@Project: NarratoAI
@File : update_script
@Author : 小林同学
@Date : 2025/5/6 下午11:00
'''
import re
import os
from typing import Dict, List, Any, Tuple, Union
def extract_timestamp_from_video_path(video_path: str) -> str:
"""
从视频文件路径中提取时间戳
Args:
video_path: 视频文件路径
Returns:
提取出的时间戳格式为 'HH:MM:SS-HH:MM:SS' 'HH:MM:SS,sss-HH:MM:SS,sss'
"""
# 使用正则表达式从文件名中提取时间戳
filename = os.path.basename(video_path)
# 匹配新格式: vid_00-00-00-000@00-00-20-250.mp4
match_new = re.search(r'vid_(\d{2})-(\d{2})-(\d{2})-(\d{3})@(\d{2})-(\d{2})-(\d{2})-(\d{3})\.mp4', filename)
if match_new:
# 提取并格式化时间戳(包含毫秒)
start_h, start_m, start_s, start_ms = match_new.group(1), match_new.group(2), match_new.group(3), match_new.group(4)
end_h, end_m, end_s, end_ms = match_new.group(5), match_new.group(6), match_new.group(7), match_new.group(8)
return f"{start_h}:{start_m}:{start_s},{start_ms}-{end_h}:{end_m}:{end_s},{end_ms}"
# 匹配旧格式: vid-00-00-00-00-00-00.mp4
match_old = re.search(r'vid-(\d{2}-\d{2}-\d{2})-(\d{2}-\d{2}-\d{2})\.mp4', filename)
if match_old:
# 提取并格式化时间戳
start_time = match_old.group(1).replace('-', ':')
end_time = match_old.group(2).replace('-', ':')
return f"{start_time}-{end_time}"
return ""
def calculate_duration(timestamp: str) -> float:
"""
计算时间戳范围的持续时间
Args:
timestamp: 格式为 'HH:MM:SS-HH:MM:SS' 'HH:MM:SS,sss-HH:MM:SS,sss' 的时间戳
Returns:
持续时间
"""
try:
start_time, end_time = timestamp.split('-')
# 处理毫秒部分
if ',' in start_time:
start_parts = start_time.split(',')
start_time_parts = start_parts[0].split(':')
start_ms = float('0.' + start_parts[1]) if len(start_parts) > 1 else 0
start_h, start_m, start_s = map(int, start_time_parts)
else:
start_h, start_m, start_s = map(int, start_time.split(':'))
start_ms = 0
if ',' in end_time:
end_parts = end_time.split(',')
end_time_parts = end_parts[0].split(':')
end_ms = float('0.' + end_parts[1]) if len(end_parts) > 1 else 0
end_h, end_m, end_s = map(int, end_time_parts)
else:
end_h, end_m, end_s = map(int, end_time.split(':'))
end_ms = 0
# 转换为秒
start_seconds = start_h * 3600 + start_m * 60 + start_s + start_ms
end_seconds = end_h * 3600 + end_m * 60 + end_s + end_ms
# 计算时间差(秒)
return round(end_seconds - start_seconds, 2)
except (ValueError, AttributeError):
return 0.0
def update_script_timestamps(
script_list: List[Dict[str, Any]],
video_result: Dict[Union[str, int], str],
audio_result: Dict[Union[str, int], str] = None,
subtitle_result: Dict[Union[str, int], str] = None,
calculate_edited_timerange: bool = True
) -> List[Dict[str, Any]]:
"""
根据 video_result 中的视频文件更新 script_list 中的时间戳添加持续时间
并根据 audio_result 添加音频路径根据 subtitle_result 添加字幕路径
Args:
script_list: 原始脚本列表
video_result: 视频结果字典键为原时间戳或_id值为视频文件路径
audio_result: 音频结果字典键为原时间戳或_id值为音频文件路径
subtitle_result: 字幕结果字典键为原时间戳或_id值为字幕文件路径
calculate_edited_timerange: 是否计算并添加成品视频中的时间范围
Returns:
更新后的脚本列表
"""
# 创建副本,避免修改原始数据
updated_script = []
# 建立ID和时间戳到视频路径和新时间戳的映射
id_timestamp_mapping = {}
for key, video_path in video_result.items():
new_timestamp = extract_timestamp_from_video_path(video_path)
if new_timestamp:
id_timestamp_mapping[key] = {
'new_timestamp': new_timestamp,
'video_path': video_path
}
# 计算累积时长,用于生成成品视频中的时间范围
accumulated_duration = 0.0
# 更新脚本中的时间戳
for item in script_list:
item_copy = item.copy()
item_id = item_copy.get('_id')
orig_timestamp = item_copy.get('timestamp', '')
# 初始化音频和字幕路径为空字符串
item_copy['audio'] = ""
item_copy['subtitle'] = ""
item_copy['video'] = "" # 初始化视频路径为空字符串
# 如果提供了音频结果字典且ID存在于音频结果中直接使用对应的音频路径
if audio_result:
if item_id and item_id in audio_result:
item_copy['audio'] = audio_result[item_id]
elif orig_timestamp in audio_result:
item_copy['audio'] = audio_result[orig_timestamp]
# 如果提供了字幕结果字典且ID存在于字幕结果中直接使用对应的字幕路径
if subtitle_result:
if item_id and item_id in subtitle_result:
item_copy['subtitle'] = subtitle_result[item_id]
elif orig_timestamp in subtitle_result:
item_copy['subtitle'] = subtitle_result[orig_timestamp]
# 添加视频路径
if item_id and item_id in video_result:
item_copy['video'] = video_result[item_id]
elif orig_timestamp in video_result:
item_copy['video'] = video_result[orig_timestamp]
# 更新时间戳和计算持续时间
current_duration = 0.0
if item_id and item_id in id_timestamp_mapping:
# 根据ID找到对应的新时间戳
item_copy['sourceTimeRange'] = id_timestamp_mapping[item_id]['new_timestamp']
current_duration = calculate_duration(item_copy['sourceTimeRange'])
item_copy['duration'] = current_duration
elif orig_timestamp in id_timestamp_mapping:
# 根据原始时间戳找到对应的新时间戳
item_copy['sourceTimeRange'] = id_timestamp_mapping[orig_timestamp]['new_timestamp']
current_duration = calculate_duration(item_copy['sourceTimeRange'])
item_copy['duration'] = current_duration
elif orig_timestamp:
# 对于未更新的时间戳,也计算并添加持续时间
item_copy['sourceTimeRange'] = orig_timestamp
current_duration = calculate_duration(orig_timestamp)
item_copy['duration'] = current_duration
# 计算片段在成品视频中的时间范围
if calculate_edited_timerange and current_duration > 0:
start_time_seconds = accumulated_duration
end_time_seconds = accumulated_duration + current_duration
# 将秒数转换为 HH:MM:SS 格式
start_h = int(start_time_seconds // 3600)
start_m = int((start_time_seconds % 3600) // 60)
start_s = int(start_time_seconds % 60)
end_h = int(end_time_seconds // 3600)
end_m = int((end_time_seconds % 3600) // 60)
end_s = int(end_time_seconds % 60)
item_copy['editedTimeRange'] = f"{start_h:02d}:{start_m:02d}:{start_s:02d}-{end_h:02d}:{end_m:02d}:{end_s:02d}"
# 更新累积时长
accumulated_duration = end_time_seconds
updated_script.append(item_copy)
return updated_script
if __name__ == '__main__':
list_script = [
{
'picture': '【解说】好的,各位,欢迎回到我的频道!《庆余年 2》刚开播就给了我们一个王炸范闲在北齐""了?这怎么可能!',
'timestamp': '00:00:00,001-00:01:15,001',
'narration': '好的各位,欢迎回到我的频道!《庆余年 2》刚开播就给了我们一个王炸范闲在北齐""了?这怎么可能!上集片尾那个巨大的悬念,这一集就立刻揭晓了!范闲假死归来,他面临的第一个,也是最大的难关,就是如何面对他最敬爱的,同时也是最可怕的那个人——庆帝!',
'OST': 0,
'_id': 1
},
{
'picture': '【解说】上一集我们看到,范闲在北齐遭遇了惊天变故,生死不明!',
'timestamp': '00:01:15,001-00:04:40,001',
'narration': '但我们都知道,他绝不可能就这么轻易退场!第二集一开场,范闲就已经秘密回到了京都。他的生死传闻,可不像我们想象中那样只是小范围流传,而是…',
'OST': 0,
'_id': 2
},
{
'picture': '画面切到王启年小心翼翼地向范闲汇报。',
'timestamp': '00:04:41,001-00:04:58,001',
'narration': '我发现大人的死讯不光是在民间,在官场上也它传开了,所以呢,所以啊,可不是什么好事,将来您跟陛下怎么交代,这可是欺君之罪',
'OST': 1,
'_id': 3
},
{
'picture': '【解说】"欺君之罪"!在封建王朝,这可是抄家灭族的大罪!搁一般人,肯定脚底抹油溜之大吉了。',
'timestamp': '00:04:58,001-00:05:45,001',
'narration': '"欺君之罪"!在封建王朝,这可是抄家灭族的大罪!搁一般人,肯定脚底抹油溜之大吉了。但范闲是谁啊?他偏要反其道而行之!他竟然决定,直接去见庆帝!冒着天大的风险,用"假死"这个事实去赌庆帝的态度!',
'OST': 0,
'_id': 4
},
{
'picture': '【解说】但想见庆帝,哪有那么容易?范闲艺高人胆大,竟然选择了最激进的方式——闯宫!',
'timestamp': '00:05:45,001-00:06:00,001',
'narration': '但想见庆帝,哪有那么容易?范闲艺高人胆大,竟然选择了最激进的方式——闯宫!',
'OST': 0,
'_id': 5
},
{
'picture': '画面切换到范闲蒙面闯入皇宫,被侍卫包围的场景。',
'timestamp': '00:06:00,001-00:06:03,001',
'narration': '抓刺客',
'OST': 1,
'_id': 6
}]
video_res = {
1: '/Users/apple/Desktop/home/NarratoAI/storage/temp/clip_video/fc3db5844d1ba7d7d838be52c0dac1bd/vid_00-00-00-000@00-00-20-250.mp4',
2: '/Users/apple/Desktop/home/NarratoAI/storage/temp/clip_video/fc3db5844d1ba7d7d838be52c0dac1bd/vid_00-00-30-000@00-00-48-950.mp4',
4: '/Users/apple/Desktop/home/NarratoAI/storage/temp/clip_video/fc3db5844d1ba7d7d838be52c0dac1bd/vid_00-01-00-000@00-01-15-688.mp4',
5: '/Users/apple/Desktop/home/NarratoAI/storage/temp/clip_video/fc3db5844d1ba7d7d838be52c0dac1bd/vid_00-01-30-000@00-01-49-512.mp4'}
audio_res = {
1: '/Users/apple/Desktop/home/NarratoAI/storage/tasks/qyn2-2-demo/audio_00_00_00-00_01_15.mp3',
2: '/Users/apple/Desktop/home/NarratoAI/storage/tasks/qyn2-2-demo/audio_00_01_15-00_04_40.mp3',
4: '/Users/apple/Desktop/home/NarratoAI/storage/tasks/qyn2-2-demo/audio_00_04_58-00_05_45.mp3',
5: '/Users/apple/Desktop/home/NarratoAI/storage/tasks/qyn2-2-demo/audio_00_05_45-00_06_00.mp3'}
sub_res = {
1: '/Users/apple/Desktop/home/NarratoAI/storage/tasks/qyn2-2-demo/subtitle_00_00_00-00_01_15.srt',
2: '/Users/apple/Desktop/home/NarratoAI/storage/tasks/qyn2-2-demo/subtitle_00_01_15-00_04_40.srt',
4: '/Users/apple/Desktop/home/NarratoAI/storage/tasks/qyn2-2-demo/subtitle_00_04_58-00_05_45.srt',
5: '/Users/apple/Desktop/home/NarratoAI/storage/tasks/qyn2-2-demo/subtitle_00_05_45-00_06_00.srt'}
# 更新并打印结果
updated_list_script = update_script_timestamps(list_script, video_res, audio_res, sub_res)
for item in updated_list_script:
print(
f"ID: {item['_id']} | Picture: {item['picture'][:20]}... | Timestamp: {item['timestamp']} | " +
f"SourceTimeRange: {item['sourceTimeRange']} | EditedTimeRange: {item.get('editedTimeRange', '')} | " +
f"Duration: {item['duration']} 秒 | Audio: {item['audio']} | Video: {item['video']} | Subtitle: {item['subtitle']}")

View File

@ -1,13 +1,13 @@
import traceback
import pysrt
# import pysrt
from typing import Optional
from typing import List
from loguru import logger
from moviepy.editor import *
from moviepy import *
from PIL import ImageFont
from contextlib import contextmanager
from moviepy.editor import (
from moviepy import (
VideoFileClip,
AudioFileClip,
TextClip,
@ -105,86 +105,6 @@ def manage_clip(clip):
del clip
def combine_clip_videos(combined_video_path: str,
video_paths: List[str],
video_ost_list: List[int],
list_script: list,
video_aspect: VideoAspect = VideoAspect.portrait,
threads: int = 2,
) -> str:
"""
合并子视频
Args:
combined_video_path: 合并后的存储路径
video_paths: 子视频路径列表
video_ost_list: 原声播放列表 (0: 不保留原声, 1: 只保留原声, 2: 保留原声并保留解说)
list_script: 剪辑脚本
video_aspect: 屏幕比例
threads: 线程数
Returns:
str: 合并后的视频路径
"""
from app.utils.utils import calculate_total_duration
audio_duration = calculate_total_duration(list_script)
logger.info(f"音频的最大持续时间: {audio_duration} s")
output_dir = os.path.dirname(combined_video_path)
aspect = VideoAspect(video_aspect)
video_width, video_height = aspect.to_resolution()
clips = []
for video_path, video_ost in zip(video_paths, video_ost_list):
try:
clip = VideoFileClip(video_path)
if video_ost == 0: # 不保留原声
clip = clip.without_audio()
# video_ost 为 1 或 2 时都保留原声,不需要特殊处理
clip = clip.set_fps(30)
# 处理视频尺寸
clip_w, clip_h = clip.size
if clip_w != video_width or clip_h != video_height:
clip = resize_video_with_padding(
clip,
target_width=video_width,
target_height=video_height
)
logger.info(f"视频 {video_path} 已调整尺寸为 {video_width} x {video_height}")
clips.append(clip)
except Exception as e:
logger.error(f"处理视频 {video_path} 时出错: {str(e)}")
continue
if not clips:
raise ValueError("没有有效的视频片段可以合并")
try:
video_clip = concatenate_videoclips(clips)
video_clip = video_clip.set_fps(30)
logger.info("开始合并视频... (过程中出现 UserWarning: 不必理会)")
video_clip.write_videofile(
filename=combined_video_path,
threads=threads,
audio_codec="aac",
fps=30,
temp_audiofile=os.path.join(output_dir, "temp-audio.m4a")
)
finally:
# 确保资源被正确放
video_clip.close()
for clip in clips:
clip.close()
logger.success("视频合并完成")
return combined_video_path
def resize_video_with_padding(clip, target_width: int, target_height: int):
"""
调整视频尺寸并添加黑边
@ -285,7 +205,8 @@ def generate_video_v3(
bgm_path: Optional[str] = None,
narration_path: Optional[str] = None,
output_path: str = "output.mp4",
font_path: Optional[str] = None
font_path: Optional[str] = None,
subtitle_enabled: bool = True
) -> None:
"""
合并视频素材包括视频字幕BGM和解说音频
@ -300,6 +221,7 @@ def generate_video_v3(
- original: 原声音量0-1默认1.0
- bgm: BGM音量0-1默认0.3
- narration: 解说音量0-1默认1.0
subtitle_enabled: 是否启用字幕默认True
subtitle_style: 字幕样式配置字典可包含以下键
- font: 字体名称
- fontsize: 字体大小
@ -319,8 +241,8 @@ def generate_video_v3(
video = VideoFileClip(video_path)
subtitle_clips = []
# 处理字幕(如果提供)
if subtitle_path:
# 处理字幕(如果启用且提供)- 修复字幕开关bug
if subtitle_enabled and subtitle_path:
if os.path.exists(subtitle_path):
# 检查字体文件
if font_path and not os.path.exists(font_path):
@ -388,30 +310,45 @@ def generate_video_v3(
except Exception as e:
logger.info(f"警告:处理字幕文件时出错: {str(e)}")
else:
logger.info(f"提示:字幕文件不存在: {subtitle_path}")
logger.warning(f"字幕文件不存在: {subtitle_path}")
elif not subtitle_enabled:
logger.info("字幕已禁用,跳过字幕处理")
elif not subtitle_path:
logger.info("未提供字幕文件路径,跳过字幕处理")
# 合并音频
audio_clips = []
# 添加原声(设置音量)
logger.debug(f"音量配置: {volume_config}")
logger.info(f"音量配置详情: {volume_config}")
if video.audio is not None:
original_audio = video.audio.volumex(volume_config['original'])
original_volume = volume_config['original']
logger.info(f"应用原声音量: {original_volume}")
original_audio = video.audio.volumex(original_volume)
audio_clips.append(original_audio)
logger.info("原声音频已添加到合成列表")
else:
logger.warning("视频没有音轨,无法添加原声")
# 添加BGM如果提供
if bgm_path:
logger.info(f"添加背景音乐: {bgm_path}")
bgm = AudioFileClip(bgm_path)
if bgm.duration < video.duration:
bgm = loop_audio_clip(bgm, video.duration)
else:
bgm = bgm.subclip(0, video.duration)
bgm = bgm.volumex(volume_config['bgm'])
bgm_volume = volume_config['bgm']
logger.info(f"应用BGM音量: {bgm_volume}")
bgm = bgm.volumex(bgm_volume)
audio_clips.append(bgm)
# 添加解说音频(如果提供)
if narration_path:
narration = AudioFileClip(narration_path).volumex(volume_config['narration'])
logger.info(f"添加解说音频: {narration_path}")
narration_volume = volume_config['narration']
logger.info(f"应用解说音量: {narration_volume}")
narration = AudioFileClip(narration_path).volumex(narration_volume)
audio_clips.append(narration)
# 合成最终视频(包含字幕)
@ -422,18 +359,53 @@ def generate_video_v3(
final_video = video
if audio_clips:
logger.info(f"合成音频轨道,共 {len(audio_clips)} 个音频片段")
final_audio = CompositeAudioClip(audio_clips)
final_video = final_video.set_audio(final_audio)
logger.info("音频合成完成")
else:
logger.warning("没有音频轨道需要合成")
# 导出视频
logger.info("开始导出视频...") # 调试信息
final_video.write_videofile(
output_path,
codec='libx264',
audio_codec='aac',
fps=video.fps
)
logger.info(f"视频已导出到: {output_path}") # 调试信息
# 导出视频 - 使用优化的编码器
logger.info("开始导出视频...")
# 获取最优编码器
from app.utils import ffmpeg_utils
optimal_encoder = ffmpeg_utils.get_optimal_ffmpeg_encoder()
# 根据编码器类型设置参数
ffmpeg_params = []
if "nvenc" in optimal_encoder:
ffmpeg_params = ['-preset', 'medium', '-profile:v', 'high']
elif "videotoolbox" in optimal_encoder:
ffmpeg_params = ['-profile:v', 'high']
elif "qsv" in optimal_encoder:
ffmpeg_params = ['-preset', 'medium']
elif "vaapi" in optimal_encoder:
ffmpeg_params = ['-profile', '100']
elif optimal_encoder == "libx264":
ffmpeg_params = ['-preset', 'medium', '-crf', '23']
try:
final_video.write_videofile(
output_path,
codec=optimal_encoder,
audio_codec='aac',
fps=video.fps,
ffmpeg_params=ffmpeg_params
)
logger.info(f"视频已导出到: {output_path} (使用编码器: {optimal_encoder})")
except Exception as e:
logger.warning(f"使用 {optimal_encoder} 编码器失败: {str(e)}, 尝试软件编码")
# 降级到软件编码
final_video.write_videofile(
output_path,
codec='libx264',
audio_codec='aac',
fps=video.fps,
ffmpeg_params=['-preset', 'medium', '-crf', '23']
)
logger.info(f"视频已导出到: {output_path} (使用软件编码)")
# 清理资源
video.close()
@ -443,4 +415,3 @@ def generate_video_v3(
bgm.close()
if narration_path:
narration.close()

View File

@ -4,8 +4,6 @@ from loguru import logger
from typing import Dict, List, Optional, Tuple
from app.services import material
from app.models.schema import VideoClipParams
from app.utils import utils
class VideoService:

View File

@ -5,10 +5,11 @@ import traceback
import edge_tts
import asyncio
from loguru import logger
from typing import List
from typing import List, Union
from datetime import datetime
from xml.sax.saxutils import unescape
from edge_tts import submaker, SubMaker
from edge_tts.submaker import mktimestamp
from moviepy.video.tools import subtitles
import time
@ -1036,7 +1037,7 @@ def is_azure_v2_voice(voice_name: str):
def tts(
text: str, voice_name: str, voice_rate: float, voice_pitch: float, voice_file: str
) -> [SubMaker, None]:
) -> Union[SubMaker, None]:
if is_azure_v2_voice(voice_name):
return azure_tts_v2(text, voice_name, voice_file)
return azure_tts_v1(text, voice_name, voice_rate, voice_pitch, voice_file)
@ -1064,7 +1065,7 @@ def convert_pitch_to_percent(rate: float) -> str:
def azure_tts_v1(
text: str, voice_name: str, voice_rate: float, voice_pitch: float, voice_file: str
) -> [SubMaker, None]:
) -> Union[SubMaker, None]:
voice_name = parse_voice_name(voice_name)
text = text.strip()
rate_str = convert_rate_to_percent(voice_rate)
@ -1087,11 +1088,6 @@ def azure_tts_v1(
)
return sub_maker, audio_data
# 判断音频文件是否已存在
if os.path.exists(voice_file):
logger.info(f"voice file exists, skip tts: {voice_file}")
continue
# 获取音频数据和字幕信息
sub_maker, audio_data = asyncio.run(_do())
@ -1105,8 +1101,6 @@ def azure_tts_v1(
# 数据有效,写入文件
with open(voice_file, "wb") as file:
file.write(audio_data)
logger.info(f"completed, output file: {voice_file}")
return sub_maker
except Exception as e:
logger.error(f"生成音频文件时出错: {str(e)}")
@ -1115,7 +1109,7 @@ def azure_tts_v1(
return None
def azure_tts_v2(text: str, voice_name: str, voice_file: str) -> [SubMaker, None]:
def azure_tts_v2(text: str, voice_name: str, voice_file: str) -> Union[SubMaker, None]:
voice_name = is_azure_v2_voice(voice_name)
if not voice_name:
logger.error(f"invalid voice name: {voice_name}")
@ -1203,11 +1197,14 @@ def azure_tts_v2(text: str, voice_name: str, voice_file: str) -> [SubMaker, None
def _format_text(text: str) -> str:
# text = text.replace("\n", " ")
text = text.replace("\n", " ")
text = text.replace("\"", " ")
text = text.replace("[", " ")
text = text.replace("]", " ")
text = text.replace("(", " ")
text = text.replace(")", " ")
text = text.replace("", " ")
text = text.replace("", " ")
text = text.replace("{", " ")
text = text.replace("}", " ")
text = text.strip()
@ -1240,7 +1237,7 @@ def create_subtitle_from_multiple(text: str, sub_maker_list: List[SubMaker], lis
if script_item['OST']:
continue
start_time, end_time = script_item['new_timestamp'].split('-')
start_time, end_time = script_item['timestamp'].split('-')
if sub_maker_index >= len(sub_maker_list):
logger.error(f"Sub maker list index out of range: {sub_maker_index}")
break
@ -1317,6 +1314,99 @@ def create_subtitle_from_multiple(text: str, sub_maker_list: List[SubMaker], lis
traceback.print_exc()
def create_subtitle(sub_maker: submaker.SubMaker, text: str, subtitle_file: str):
"""
优化字幕文件
1. 将字幕文件按照标点符号分割成多行
2. 逐行匹配字幕文件中的文本
3. 生成新的字幕文件
"""
text = _format_text(text)
def formatter(idx: int, start_time: float, end_time: float, sub_text: str) -> str:
"""
1
00:00:00,000 --> 00:00:02,360
跑步是一项简单易行的运动
"""
start_t = mktimestamp(start_time).replace(".", ",")
end_t = mktimestamp(end_time).replace(".", ",")
return f"{idx}\n" f"{start_t} --> {end_t}\n" f"{sub_text}\n"
start_time = -1.0
sub_items = []
sub_index = 0
script_lines = utils.split_string_by_punctuations(text)
def match_line(_sub_line: str, _sub_index: int):
if len(script_lines) <= _sub_index:
return ""
_line = script_lines[_sub_index]
if _sub_line == _line:
return script_lines[_sub_index].strip()
_sub_line_ = re.sub(r"[^\w\s]", "", _sub_line)
_line_ = re.sub(r"[^\w\s]", "", _line)
if _sub_line_ == _line_:
return _line_.strip()
_sub_line_ = re.sub(r"\W+", "", _sub_line)
_line_ = re.sub(r"\W+", "", _line)
if _sub_line_ == _line_:
return _line.strip()
return ""
sub_line = ""
try:
for _, (offset, sub) in enumerate(zip(sub_maker.offset, sub_maker.subs)):
_start_time, end_time = offset
if start_time < 0:
start_time = _start_time
sub = unescape(sub)
sub_line += sub
sub_text = match_line(sub_line, sub_index)
if sub_text:
sub_index += 1
line = formatter(
idx=sub_index,
start_time=start_time,
end_time=end_time,
sub_text=sub_text,
)
sub_items.append(line)
start_time = -1.0
sub_line = ""
if len(sub_items) == len(script_lines):
with open(subtitle_file, "w", encoding="utf-8") as file:
file.write("\n".join(sub_items) + "\n")
try:
sbs = subtitles.file_to_subtitles(subtitle_file, encoding="utf-8")
duration = max([tb for ((ta, tb), txt) in sbs])
logger.info(
f"已创建字幕文件: {subtitle_file}, duration: {duration}"
)
return subtitle_file, duration
except Exception as e:
logger.error(f"failed, error: {str(e)}")
os.remove(subtitle_file)
else:
logger.error(
f"字幕创建失败, 字幕长度: {len(sub_items)}, script_lines len: {len(script_lines)}"
f"\nsub_items:{json.dumps(sub_items, indent=4, ensure_ascii=False)}"
f"\nscript_lines:{json.dumps(script_lines, indent=4, ensure_ascii=False)}"
)
except Exception as e:
logger.error(f"failed, error: {str(e)}")
def get_audio_duration(sub_maker: submaker.SubMaker):
"""
获取音频时长
@ -1326,7 +1416,7 @@ def get_audio_duration(sub_maker: submaker.SubMaker):
return sub_maker.offset[-1][1] / 10000000
def tts_multiple(task_id: str, list_script: list, voice_name: str, voice_rate: float, voice_pitch: float, force_regenerate: bool = True):
def tts_multiple(task_id: str, list_script: list, voice_name: str, voice_rate: float, voice_pitch: float):
"""
根据JSON文件中的多段文本进行TTS转换
@ -1334,25 +1424,18 @@ def tts_multiple(task_id: str, list_script: list, voice_name: str, voice_rate: f
:param list_script: 脚本列表
:param voice_name: 语音名称
:param voice_rate: 语音速率
:param force_regenerate: 是否强制重新生成已存在的音频文件
:return: 生成的音频文件列表
"""
voice_name = parse_voice_name(voice_name)
output_dir = utils.task_dir(task_id)
audio_files = []
sub_maker_list = []
tts_results = []
for item in list_script:
if item['OST'] != 1:
# 将时间戳中的冒号替换为下划线
timestamp = item['new_timestamp'].replace(':', '_')
timestamp = item['timestamp'].replace(':', '_')
audio_file = os.path.join(output_dir, f"audio_{timestamp}.mp3")
# 检查文件是否已存在,如存在且不强制重新生成,则跳过
if os.path.exists(audio_file) and not force_regenerate:
logger.info(f"音频文件已存在,跳过生成: {audio_file}")
audio_files.append(audio_file)
continue
subtitle_file = os.path.join(output_dir, f"subtitle_{timestamp}.srt")
text = item['narration']
@ -1369,9 +1452,18 @@ def tts_multiple(task_id: str, list_script: list, voice_name: str, voice_rate: f
f"如果您在中国请使用VPN; "
f"或者使用其他 tts 引擎")
continue
else:
# 为当前片段生成字幕文件
_, duration = create_subtitle(sub_maker=sub_maker, text=text, subtitle_file=subtitle_file)
audio_files.append(audio_file)
sub_maker_list.append(sub_maker)
tts_results.append({
"_id": item['_id'],
"timestamp": item['timestamp'],
"audio_file": audio_file,
"subtitle_file": subtitle_file,
"duration": duration,
"text": text,
})
logger.info(f"已生成音频文件: {audio_file}")
return audio_files, sub_maker_list
return tts_results

1017
app/utils/ffmpeg_utils.py Normal file

File diff suppressed because it is too large Load Diff

View File

@ -61,7 +61,6 @@ class VisionAnalyzer:
try:
# 加载图片
if isinstance(images[0], str):
logger.info("正在加载图片...")
images = self.load_images(images)
# 验证图片列表
@ -81,11 +80,14 @@ class VisionAnalyzer:
images = valid_images
results = []
total_batches = (len(images) + batch_size - 1) // batch_size
# 视频帧总数除以批量处理大小,如果有小数则+1
batches_needed = len(images) // batch_size
if len(images) % batch_size > 0:
batches_needed += 1
logger.debug(f"视频帧总数:{len(images)}, 每批处理 {batch_size} 帧, 需要访问 VLM {batches_needed}")
logger.debug(f"{total_batches} 个批次,每批次 {batch_size} 张图片")
with tqdm(total=total_batches, desc="分析进度") as pbar:
with tqdm(total=batches_needed, desc="分析进度") as pbar:
for i in range(0, len(images), batch_size):
batch = images[i:i + batch_size]
retry_count = 0
@ -93,8 +95,8 @@ class VisionAnalyzer:
while retry_count < 3:
try:
# 在每个批次处理前添加小延迟
if i > 0:
await asyncio.sleep(2)
# if i > 0:
# await asyncio.sleep(2)
# 确保每个批次的图片都是有效的
valid_batch = [img for img in batch if isinstance(img, PIL.Image.Image)]

View File

@ -30,7 +30,7 @@ class QwenAnalyzer:
self.model_name = model_name
self.api_key = api_key
self.base_url = base_url or "https://dashscope.aliyuncs.com/compatible-mode/v1"
self.base_url = base_url
# 配置API客户端
self._configure_client()
@ -80,7 +80,7 @@ class QwenAnalyzer:
# 添加文本提示
content.append({
"type": "text",
"text": prompt
"text": prompt % (len(content), len(content), len(content))
})
# 调用API
@ -102,7 +102,7 @@ class QwenAnalyzer:
async def analyze_images(self,
images: Union[List[str], List[PIL.Image.Image]],
prompt: str,
batch_size: int = 5) -> List[Dict]:
batch_size: int) -> List[Dict]:
"""
批量分析多张图片
Args:
@ -118,7 +118,6 @@ class QwenAnalyzer:
# 加载图片
if isinstance(images[0], str):
logger.info("正在加载图片...")
images = self.load_images(images)
# 验证图片列表
@ -141,9 +140,14 @@ class QwenAnalyzer:
images = valid_images
results = []
total_batches = (len(images) + batch_size - 1) // batch_size
# 视频帧总数除以批量处理大小,如果有小数则+1
batches_needed = len(images) // batch_size
if len(images) % batch_size > 0:
batches_needed += 1
logger.debug(f"视频帧总数:{len(images)}, 每批处理 {batch_size} 帧, 需要访问 VLM {batches_needed}")
with tqdm(total=total_batches, desc="分析进度") as pbar:
with tqdm(total=batches_needed, desc="分析进度") as pbar:
for i in range(0, len(images), batch_size):
batch = images[i:i + batch_size]
batch_paths = valid_paths[i:i + batch_size] if valid_paths else None
@ -151,9 +155,9 @@ class QwenAnalyzer:
while retry_count < 3:
try:
# 在每个批次处理前<EFBFBD><EFBFBD>加小延迟
if i > 0:
await asyncio.sleep(2)
# 在每个批次处理前加小延迟
# if i > 0:
# await asyncio.sleep(0.5)
# 确保每个批次的图片都是有效的
valid_batch = [img for img in batch if isinstance(img, PIL.Image.Image)]
@ -209,7 +213,7 @@ class QwenAnalyzer:
for i, result in enumerate(results):
response_text = result['response']
# 如果有图片路径信息,<EFBFBD><EFBFBD><EFBFBD>用它来生成文件名
# 如果有图片路径信息,用它来生成文件名
if result.get('image_paths'):
image_paths = result['image_paths']
img_name_start = Path(image_paths[0]).stem.split('_')[-1]

View File

@ -2,7 +2,7 @@ import os
import json
import traceback
from loguru import logger
import tiktoken
# import tiktoken
from typing import List, Dict
from datetime import datetime
from openai import OpenAI
@ -94,12 +94,12 @@ class OpenAIGenerator(BaseGenerator):
"user": "script_generator"
}
# 初始化token计数器
try:
self.encoding = tiktoken.encoding_for_model(self.model_name)
except KeyError:
logger.warning(f"未找到模型 {self.model_name} 的专用编码器,使用默认编码器")
self.encoding = tiktoken.get_encoding("cl100k_base")
# # 初始化token计数器
# try:
# self.encoding = tiktoken.encoding_for_model(self.model_name)
# except KeyError:
# logger.warning(f"未找到模型 {self.model_name} 的专用编码器,使用默认编码器")
# self.encoding = tiktoken.get_encoding("cl100k_base")
def _generate(self, messages: list, params: dict) -> any:
"""实现OpenAI特定的生成逻辑"""

View File

@ -197,6 +197,28 @@ def time_convert_seconds_to_hmsm(seconds) -> str:
return "{:02d}:{:02d}:{:02d},{:03d}".format(hours, minutes, seconds, milliseconds)
def format_time(seconds: float) -> str:
"""
将秒数转换为格式化的时间字符串 (HH:MM:SS,mmm)
参数:
seconds: 需要转换的秒数可以是整数或浮点数
返回:
格式化的时间字符串格式为 HH:MM:SS,mmm
"""
# 计算小时、分钟、秒和毫秒
hours = int(seconds // 3600)
remaining_seconds = seconds % 3600
minutes = int(remaining_seconds // 60)
remaining_seconds = remaining_seconds % 60
secs = int(remaining_seconds)
milliseconds = int((remaining_seconds - secs) * 1000)
# 格式化为时间字符串
return "{:02d}:{:02d}:{:02d},{:03d}".format(hours, minutes, secs, milliseconds)
def text_to_srt(idx: int, msg: str, start_time: float, end_time: float) -> str:
start_time = time_convert_seconds_to_hmsm(start_time)
end_time = time_convert_seconds_to_hmsm(end_time)
@ -303,6 +325,15 @@ def video_dir(sub_dir: str = ""):
return d
def subtitle_dir(sub_dir: str = ""):
d = resource_dir(f"srt")
if sub_dir:
d = os.path.join(d, sub_dir)
if not os.path.exists(d):
os.makedirs(d)
return d
def split_timestamp(timestamp):
"""
拆分时间戳
@ -506,7 +537,7 @@ def cut_video(params, progress_callback=None):
st.session_state['subclip_videos'] = subclip_videos
for i, video_script in enumerate(video_script_list):
try:
video_script['path'] = subclip_videos[video_script['timestamp']]
video_script['path'] = subclip_videos[i+1]
except KeyError as err:
logger.error(f"裁剪视频失败: {err}")

View File

@ -1,237 +1,225 @@
import cv2
import numpy as np
from sklearn.cluster import MiniBatchKMeans
"""
视频帧提取工具
这个模块提供了简单高效的视频帧提取功能主要特点
1. 使用ffmpeg进行视频处理支持硬件加速
2. 按指定时间间隔提取视频关键帧
3. 支持多种视频格式
4. 支持高清视频帧输出
5. 直接从原视频提取高质量关键帧
不依赖OpenCV和sklearn等库只使用ffmpeg作为外部依赖降低了安装和使用的复杂度
"""
import os
import re
from typing import List, Tuple, Generator
import time
import subprocess
from typing import List, Dict
from loguru import logger
import gc
from tqdm import tqdm
from app.utils import ffmpeg_utils
class VideoProcessor:
def __init__(self, video_path: str, batch_size: int = 100):
def __init__(self, video_path: str):
"""
初始化视频处理器
Args:
video_path: 视频文件路径
batch_size: 批处理大小控制内存使用
"""
if not os.path.exists(video_path):
raise FileNotFoundError(f"视频文件不存在: {video_path}")
self.video_path = video_path
self.batch_size = batch_size
self.cap = cv2.VideoCapture(video_path)
if not self.cap.isOpened():
raise RuntimeError(f"无法打开视频文件: {video_path}")
self.total_frames = int(self.cap.get(cv2.CAP_PROP_FRAME_COUNT))
self.fps = int(self.cap.get(cv2.CAP_PROP_FPS))
self.video_info = self._get_video_info()
self.fps = float(self.video_info.get('fps', 25))
self.duration = float(self.video_info.get('duration', 0))
self.width = int(self.video_info.get('width', 0))
self.height = int(self.video_info.get('height', 0))
self.total_frames = int(self.fps * self.duration)
def __del__(self):
"""析构函数,确保视频资源被释放"""
if hasattr(self, 'cap'):
self.cap.release()
gc.collect()
def _get_video_info(self) -> Dict[str, str]:
"""
使用ffprobe获取视频信息
def preprocess_video(self) -> Generator[Tuple[int, np.ndarray], None, None]:
"""
使用生成器方式分批读取视频帧
Yields:
Tuple[int, np.ndarray]: (帧索引, 视频帧)
"""
self.cap.set(cv2.CAP_PROP_POS_FRAMES, 0)
frame_idx = 0
while self.cap.isOpened():
ret, frame = self.cap.read()
if not ret:
break
# 降低分辨率以减少内存使用
frame = cv2.resize(frame, (0, 0), fx=0.5, fy=0.5)
yield frame_idx, frame
frame_idx += 1
# 定期进行垃圾回收
if frame_idx % 1000 == 0:
gc.collect()
def detect_shot_boundaries(self, threshold: int = 70) -> List[int]:
"""
使用批处理方式检测镜头边界
Args:
threshold: 差异阈值
Returns:
List[int]: 镜头边界帧的索引列表
Dict[str, str]: 包含视频基本信息的字典
"""
shot_boundaries = []
prev_frame = None
prev_idx = -1
pbar = tqdm(self.preprocess_video(),
total=self.total_frames,
desc="检测镜头边界",
unit="")
for frame_idx, curr_frame in pbar:
if prev_frame is not None:
prev_gray = cv2.cvtColor(prev_frame, cv2.COLOR_BGR2GRAY)
curr_gray = cv2.cvtColor(curr_frame, cv2.COLOR_BGR2GRAY)
diff = np.mean(np.abs(curr_gray.astype(float) - prev_gray.astype(float)))
if diff > threshold:
shot_boundaries.append(frame_idx)
pbar.set_postfix({"检测到边界": len(shot_boundaries)})
prev_frame = curr_frame.copy()
prev_idx = frame_idx
del curr_frame
if frame_idx % 100 == 0:
gc.collect()
return shot_boundaries
cmd = [
"ffprobe",
"-v", "error",
"-select_streams", "v:0",
"-show_entries", "stream=width,height,r_frame_rate,duration",
"-of", "default=noprint_wrappers=1:nokey=0",
self.video_path
]
def process_shot(self, shot_frames: List[Tuple[int, np.ndarray]]) -> Tuple[np.ndarray, int]:
"""
处理单个镜头的帧
Args:
shot_frames: 镜头中的帧列表
Returns:
Tuple[np.ndarray, int]: (关键帧, 帧索引)
"""
if not shot_frames:
return None, -1
frame_features = []
frame_indices = []
for idx, frame in tqdm(shot_frames,
desc="处理镜头帧",
unit="",
leave=False):
gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
resized_gray = cv2.resize(gray, (32, 32))
frame_features.append(resized_gray.flatten())
frame_indices.append(idx)
frame_features = np.array(frame_features)
kmeans = MiniBatchKMeans(n_clusters=1, batch_size=min(len(frame_features), 100),
random_state=0).fit(frame_features)
center_idx = np.argmin(np.sum((frame_features - kmeans.cluster_centers_[0]) ** 2, axis=1))
return shot_frames[center_idx][1], frame_indices[center_idx]
try:
result = subprocess.run(cmd, capture_output=True, text=True, check=True)
lines = result.stdout.strip().split('\n')
info = {}
for line in lines:
if '=' in line:
key, value = line.split('=', 1)
info[key] = value
def extract_keyframes(self, shot_boundaries: List[int]) -> Generator[Tuple[np.ndarray, int], None, None]:
"""
使用生成器方式提取关键帧
Args:
shot_boundaries: 镜头边界列表
Yields:
Tuple[np.ndarray, int]: (关键帧, 帧索引)
"""
shot_frames = []
current_shot_start = 0
for frame_idx, frame in self.preprocess_video():
if frame_idx in shot_boundaries:
if shot_frames:
keyframe, keyframe_idx = self.process_shot(shot_frames)
if keyframe is not None:
yield keyframe, keyframe_idx
# 清理内存
shot_frames.clear()
gc.collect()
current_shot_start = frame_idx
shot_frames.append((frame_idx, frame))
# 控制单个镜头的最大帧数
if len(shot_frames) > self.batch_size:
keyframe, keyframe_idx = self.process_shot(shot_frames)
if keyframe is not None:
yield keyframe, keyframe_idx
shot_frames.clear()
gc.collect()
# 处理最后一个镜头
if shot_frames:
keyframe, keyframe_idx = self.process_shot(shot_frames)
if keyframe is not None:
yield keyframe, keyframe_idx
# 处理帧率(可能是分数形式)
if 'r_frame_rate' in info:
try:
num, den = map(int, info['r_frame_rate'].split('/'))
info['fps'] = str(num / den)
except ValueError:
info['fps'] = info.get('r_frame_rate', '25')
def process_video(self, output_dir: str, skip_seconds: float = 0) -> None:
return info
except subprocess.CalledProcessError as e:
logger.error(f"获取视频信息失败: {e.stderr}")
return {
'width': '1280',
'height': '720',
'fps': '25',
'duration': '0'
}
def extract_frames_by_interval(self, output_dir: str, interval_seconds: float = 5.0,
use_hw_accel: bool = True) -> List[int]:
"""
处理视频并提取关键帧使用分批处理方式
按指定时间间隔提取视频帧
Args:
output_dir: 输出目录
skip_seconds: 跳过视频开头的秒数
interval_seconds: 帧提取间隔
use_hw_accel: 是否使用硬件加速
Returns:
List[int]: 提取的帧号列表
"""
if not os.path.exists(output_dir):
os.makedirs(output_dir)
# 计算起始时间和帧提取点
start_time = 0
end_time = self.duration
extraction_times = []
current_time = start_time
while current_time < end_time:
extraction_times.append(current_time)
current_time += interval_seconds
if not extraction_times:
logger.warning("未找到需要提取的帧")
return []
# 确定硬件加速器选项
hw_accel = []
if use_hw_accel and ffmpeg_utils.is_ffmpeg_hwaccel_available():
hw_accel = ffmpeg_utils.get_ffmpeg_hwaccel_args()
# 提取帧
frame_numbers = []
for i, timestamp in enumerate(tqdm(extraction_times, desc="提取视频帧")):
frame_number = int(timestamp * self.fps)
frame_numbers.append(frame_number)
# 格式化时间戳字符串 (HHMMSSmmm)
hours = int(timestamp // 3600)
minutes = int((timestamp % 3600) // 60)
seconds = int(timestamp % 60)
milliseconds = int((timestamp % 1) * 1000)
time_str = f"{hours:02d}{minutes:02d}{seconds:02d}{milliseconds:03d}"
output_path = os.path.join(output_dir, f"keyframe_{frame_number:06d}_{time_str}.jpg")
# 使用ffmpeg提取单帧
cmd = [
"ffmpeg",
"-hide_banner",
"-loglevel", "error",
]
# 添加硬件加速参数
cmd.extend(hw_accel)
cmd.extend([
"-ss", str(timestamp),
"-i", self.video_path,
"-vframes", "1",
"-q:v", "1", # 最高质量
"-y",
output_path
])
try:
subprocess.run(cmd, check=True, capture_output=True)
except subprocess.CalledProcessError as e:
logger.warning(f"提取帧 {frame_number} 失败: {e.stderr}")
logger.info(f"成功提取了 {len(frame_numbers)} 个视频帧")
return frame_numbers
def _detect_hw_accelerator(self) -> List[str]:
"""
检测系统可用的硬件加速器
Returns:
List[str]: 硬件加速器ffmpeg命令参数
"""
# 使用集中式硬件加速检测
if ffmpeg_utils.is_ffmpeg_hwaccel_available():
return ffmpeg_utils.get_ffmpeg_hwaccel_args()
return []
def process_video_pipeline(self,
output_dir: str,
interval_seconds: float = 5.0, # 帧提取间隔(秒)
use_hw_accel: bool = True) -> None:
"""
执行简化的视频处理流程直接从原视频按固定时间间隔提取帧
Args:
output_dir: 输出目录
interval_seconds: 帧提取间隔
use_hw_accel: 是否使用硬件加速
"""
# 创建输出目录
os.makedirs(output_dir, exist_ok=True)
try:
# 创建输出目录
os.makedirs(output_dir, exist_ok=True)
# 计算要跳过的帧数
skip_frames = int(skip_seconds * self.fps)
self.cap.set(cv2.CAP_PROP_POS_FRAMES, skip_frames)
# 检测镜头边界
logger.info("开始检测镜头边界...")
shot_boundaries = self.detect_shot_boundaries()
# 提取关键帧
logger.info("开始提取关键帧...")
frame_count = 0
pbar = tqdm(self.extract_keyframes(shot_boundaries),
desc="提取关键帧",
unit="")
for keyframe, frame_idx in pbar:
if frame_idx < skip_frames:
continue
# 计算时间戳
timestamp = frame_idx / self.fps
hours = int(timestamp // 3600)
minutes = int((timestamp % 3600) // 60)
seconds = int(timestamp % 60)
time_str = f"{hours:02d}{minutes:02d}{seconds:02d}"
# 保存关键帧
output_path = os.path.join(output_dir,
f'keyframe_{frame_idx:06d}_{time_str}.jpg')
cv2.imwrite(output_path, keyframe)
frame_count += 1
pbar.set_postfix({"已保存": frame_count})
if frame_count % 10 == 0:
gc.collect()
logger.info(f"关键帧提取完成,共保存 {frame_count} 帧到 {output_dir}")
# 直接从原视频提取关键帧
logger.info(f"从视频间隔 {interval_seconds} 秒提取关键帧...")
self.extract_frames_by_interval(
output_dir,
interval_seconds=interval_seconds,
use_hw_accel=use_hw_accel
)
logger.info(f"处理完成!视频帧已保存在: {output_dir}")
except Exception as e:
logger.error(f"视频处理失败: {str(e)}")
import traceback
logger.error(f"视频处理失败: \n{traceback.format_exc()}")
raise
finally:
# 确保资源被释放
self.cap.release()
gc.collect()
if __name__ == "__main__":
import time
start_time = time.time()
# 使用示例
processor = VideoProcessor("./resource/videos/test.mp4")
# 设置间隔为3秒提取帧
processor.process_video_pipeline(
output_dir="output",
interval_seconds=3.0,
use_hw_accel=True
)
end_time = time.time()
print(f"处理完成!总耗时: {end_time - start_time:.2f}")

View File

@ -1,382 +0,0 @@
import cv2
import numpy as np
from sklearn.cluster import KMeans
import os
import re
from typing import List, Tuple, Generator
from loguru import logger
import subprocess
from tqdm import tqdm
class VideoProcessor:
def __init__(self, video_path: str):
"""
初始化视频处理器
Args:
video_path: 视频文件路径
"""
if not os.path.exists(video_path):
raise FileNotFoundError(f"视频文件不存在: {video_path}")
self.video_path = video_path
self.cap = cv2.VideoCapture(video_path)
if not self.cap.isOpened():
raise RuntimeError(f"无法打开视频文件: {video_path}")
self.total_frames = int(self.cap.get(cv2.CAP_PROP_FRAME_COUNT))
self.fps = int(self.cap.get(cv2.CAP_PROP_FPS))
def __del__(self):
"""析构函数,确保视频资源被释放"""
if hasattr(self, 'cap'):
self.cap.release()
def preprocess_video(self) -> Generator[np.ndarray, None, None]:
"""
使用生成器方式读取视频帧
Yields:
np.ndarray: 视频帧
"""
self.cap.set(cv2.CAP_PROP_POS_FRAMES, 0) # 重置到视频开始
while self.cap.isOpened():
ret, frame = self.cap.read()
if not ret:
break
yield frame
def detect_shot_boundaries(self, frames: List[np.ndarray], threshold: int = 30) -> List[int]:
"""
使用帧差法检测镜头边界
Args:
frames: 视频帧列表
threshold: 差异阈值默认值调低为30
Returns:
List[int]: 镜头边界帧的索引列表
"""
shot_boundaries = []
if len(frames) < 2: # 添加帧数检查
logger.warning("视频帧数过少,无法检测场景边界")
return [len(frames) - 1] # 返回最后一帧作为边界
for i in range(1, len(frames)):
prev_frame = cv2.cvtColor(frames[i - 1], cv2.COLOR_BGR2GRAY)
curr_frame = cv2.cvtColor(frames[i], cv2.COLOR_BGR2GRAY)
# 计算帧差
diff = np.mean(np.abs(curr_frame.astype(float) - prev_frame.astype(float)))
if diff > threshold:
shot_boundaries.append(i)
# 如果没有检测到任何边界,至少返回最后一帧
if not shot_boundaries:
logger.warning("未检测到场景边界,将视频作为单个场景处理")
shot_boundaries.append(len(frames) - 1)
return shot_boundaries
def extract_keyframes(self, frames: List[np.ndarray], shot_boundaries: List[int]) -> Tuple[
List[np.ndarray], List[int]]:
"""
从每个镜头中提取关键帧
Args:
frames: 视频帧列表
shot_boundaries: 镜头边界列表
Returns:
Tuple[List[np.ndarray], List[int]]: 关键帧列表和对应的帧索引
"""
keyframes = []
keyframe_indices = []
for i in tqdm(range(len(shot_boundaries)), desc="提取关键帧"):
start = shot_boundaries[i - 1] if i > 0 else 0
end = shot_boundaries[i]
shot_frames = frames[start:end]
if not shot_frames:
continue
# 将每一帧转换为灰度图并展平为一维数组
frame_features = np.array([cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY).flatten()
for frame in shot_frames])
try:
# 尝试使用 KMeans
kmeans = KMeans(n_clusters=1, random_state=0).fit(frame_features)
center_idx = np.argmin(np.sum((frame_features - kmeans.cluster_centers_[0]) ** 2, axis=1))
except Exception as e:
logger.warning(f"KMeans 聚类失败,使用备选方案: {str(e)}")
# 备选方案:选择镜头中间的帧作为关键帧
center_idx = len(shot_frames) // 2
keyframes.append(shot_frames[center_idx])
keyframe_indices.append(start + center_idx)
return keyframes, keyframe_indices
def save_keyframes(self, keyframes: List[np.ndarray], keyframe_indices: List[int],
output_dir: str, desc: str = "保存关键帧") -> None:
"""
保存关键帧到指定目录文件名格式为keyframe_帧序号_时间戳.jpg
时间戳精确到毫秒格式为HHMMSSmmm
"""
if not os.path.exists(output_dir):
os.makedirs(output_dir)
for keyframe, frame_idx in tqdm(zip(keyframes, keyframe_indices),
total=len(keyframes),
desc=desc):
# 计算精确到毫秒的时间戳
timestamp = frame_idx / self.fps
hours = int(timestamp // 3600)
minutes = int((timestamp % 3600) // 60)
seconds = int(timestamp % 60)
milliseconds = int((timestamp % 1) * 1000) # 计算毫秒部分
time_str = f"{hours:02d}{minutes:02d}{seconds:02d}{milliseconds:03d}"
output_path = os.path.join(output_dir,
f'keyframe_{frame_idx:06d}_{time_str}.jpg')
cv2.imwrite(output_path, keyframe)
def extract_frames_by_numbers(self, frame_numbers: List[int], output_folder: str) -> None:
"""
根据指定的帧号提取帧如果多个帧在同一毫秒内只保留一个
"""
if not frame_numbers:
raise ValueError("未提供帧号列表")
if any(fn >= self.total_frames or fn < 0 for fn in frame_numbers):
raise ValueError("存在无效的帧号")
if not os.path.exists(output_folder):
os.makedirs(output_folder)
# 用于记录已处理的时间戳(毫秒)
processed_timestamps = set()
for frame_number in tqdm(frame_numbers, desc="提取高清帧"):
# 计算精确到毫秒的时间戳
timestamp = frame_number / self.fps
timestamp_ms = int(timestamp * 1000) # 转换为毫秒
# 如果这一毫秒已经处理过,跳过
if timestamp_ms in processed_timestamps:
continue
self.cap.set(cv2.CAP_PROP_POS_FRAMES, frame_number)
ret, frame = self.cap.read()
if ret:
# 记录这一毫秒已经处理
processed_timestamps.add(timestamp_ms)
# 计算时间戳字符串
hours = int(timestamp // 3600)
minutes = int((timestamp % 3600) // 60)
seconds = int(timestamp % 60)
milliseconds = int((timestamp % 1) * 1000) # 计算毫秒部分
time_str = f"{hours:02d}{minutes:02d}{seconds:02d}{milliseconds:03d}"
output_path = os.path.join(output_folder,
f"keyframe_{frame_number:06d}_{time_str}.jpg")
cv2.imwrite(output_path, frame)
else:
logger.info(f"无法读取帧 {frame_number}")
logger.info(f"共提取了 {len(processed_timestamps)} 个不同时间戳的帧")
@staticmethod
def extract_numbers_from_folder(folder_path: str) -> List[int]:
"""
从文件夹中提取帧号
Args:
folder_path: 关键帧文件夹路径
Returns:
List[int]: 排序后的帧号列表
"""
files = [f for f in os.listdir(folder_path) if f.endswith('.jpg')]
# 更新正则表达式以匹配新的文件名格式keyframe_000123_010534123.jpg
pattern = re.compile(r'keyframe_(\d+)_\d{9}\.jpg$')
numbers = []
for f in files:
match = pattern.search(f)
if match:
numbers.append(int(match.group(1)))
else:
logger.warning(f"文件名格式不匹配: {f}")
if not numbers:
logger.error(f"在目录 {folder_path} 中未找到有效的关键帧文件")
return sorted(numbers)
def process_video(self, output_dir: str, skip_seconds: float = 0, threshold: int = 30) -> None:
"""
处理视频并提取关键帧
Args:
output_dir: 输出目录
skip_seconds: 跳过视频开头的秒数
"""
skip_frames = int(skip_seconds * self.fps)
logger.info("读取视频帧...")
frames = []
for frame in tqdm(self.preprocess_video(),
total=self.total_frames,
desc="读取视频"):
frames.append(frame)
frames = frames[skip_frames:]
if not frames:
raise ValueError(f"跳过 {skip_seconds} 秒后没有剩余帧可以处理")
logger.info("检测场景边界...")
shot_boundaries = self.detect_shot_boundaries(frames, threshold)
logger.info(f"检测到 {len(shot_boundaries)} 个场景边界")
keyframes, keyframe_indices = self.extract_keyframes(frames, shot_boundaries)
adjusted_indices = [idx + skip_frames for idx in keyframe_indices]
self.save_keyframes(keyframes, adjusted_indices, output_dir, desc="保存压缩关键帧")
def process_video_pipeline(self,
output_dir: str,
skip_seconds: float = 0,
threshold: int = 20, # 降低默认阈值
compressed_width: int = 320,
keep_temp: bool = False) -> None:
"""
执行完整的视频处理流程
Args:
threshold: 降低默认阈值为20使场景检测更敏感
"""
os.makedirs(output_dir, exist_ok=True)
temp_dir = os.path.join(output_dir, 'temp')
compressed_dir = os.path.join(temp_dir, 'compressed')
mini_frames_dir = os.path.join(temp_dir, 'mini_frames')
hd_frames_dir = output_dir
os.makedirs(temp_dir, exist_ok=True)
os.makedirs(compressed_dir, exist_ok=True)
os.makedirs(mini_frames_dir, exist_ok=True)
os.makedirs(hd_frames_dir, exist_ok=True)
mini_processor = None
compressed_video = None
try:
# 1. 压缩视频
video_name = os.path.splitext(os.path.basename(self.video_path))[0]
compressed_video = os.path.join(compressed_dir, f"{video_name}_compressed.mp4")
# 获取原始视频的宽度和高度
original_width = int(self.cap.get(cv2.CAP_PROP_FRAME_WIDTH))
original_height = int(self.cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
logger.info("步骤1: 压缩视频...")
if original_width > original_height:
# 横版视频
scale_filter = f'scale={compressed_width}:-1'
else:
# 竖版视频
scale_filter = f'scale=-1:{compressed_width}'
ffmpeg_cmd = [
'ffmpeg', '-i', self.video_path,
'-vf', scale_filter,
'-y',
compressed_video
]
try:
subprocess.run(ffmpeg_cmd, check=True, capture_output=True, text=True)
except subprocess.CalledProcessError as e:
logger.error(f"FFmpeg 错误输出: {e.stderr}")
raise
# 2. 从压缩视频中提取关键帧
logger.info("\n步骤2: 从压缩视频提取关键帧...")
mini_processor = VideoProcessor(compressed_video)
mini_processor.process_video(mini_frames_dir, skip_seconds, threshold)
# 3. 从原始视频提取高清关键帧
logger.info("\n步骤3: 提取高清关键帧...")
frame_numbers = self.extract_numbers_from_folder(mini_frames_dir)
if not frame_numbers:
raise ValueError("未能从压缩视频中提取到有效的关键帧")
self.extract_frames_by_numbers(frame_numbers, hd_frames_dir)
logger.info(f"处理完成!高清关键帧保存在: {hd_frames_dir}")
except Exception as e:
import traceback
logger.error(f"视频处理失败: \n{traceback.format_exc()}")
raise
finally:
# 释放资源
if mini_processor:
mini_processor.cap.release()
del mini_processor
# 确保视频文件句柄被释放
if hasattr(self, 'cap'):
self.cap.release()
# 等待资源释放
import time
time.sleep(0.5)
if not keep_temp:
try:
# 先删除压缩视频文件
if compressed_video and os.path.exists(compressed_video):
try:
os.remove(compressed_video)
except Exception as e:
logger.warning(f"删除压缩视频失败: {e}")
# 再删除临时目录
import shutil
if os.path.exists(temp_dir):
max_retries = 3
for i in range(max_retries):
try:
shutil.rmtree(temp_dir)
break
except Exception as e:
if i == max_retries - 1:
logger.warning(f"清理临时文件失败: {e}")
else:
time.sleep(1) # 等待1秒后重试
continue
logger.info("临时文件已清理")
except Exception as e:
logger.warning(f"清理临时文件时出错: {e}")
if __name__ == "__main__":
import time
start_time = time.time()
processor = VideoProcessor("E:\\projects\\NarratoAI\\resource\\videos\\test.mp4")
processor.process_video_pipeline(output_dir="output")
end_time = time.time()
print(f"处理完成!总耗时: {end_time - start_time:.2f}")

View File

@ -1,184 +1,89 @@
[app]
project_version="0.5.3"
project_version="0.6.5"
# 支持视频理解的大模型提供商
# gemini
# qwenvl
vision_llm_provider="qwenvl"
vision_analysis_prompt = "你是资深视频内容分析专家,擅长分析视频画面信息,分析下面视频画面内容,只输出客观的画面描述不要给任何总结或评价"
# gemini (谷歌, 需要 VPN)
# siliconflow (硅基流动)
# qwenvl (通义千问)
vision_llm_provider="Siliconflow"
########## Vision Gemini API Key
########## Gemini 视觉模型
vision_gemini_api_key = ""
vision_gemini_model_name = "gemini-2.0-flash"
vision_gemini_model_name = "gemini-2.0-flash-lite"
########## Vision Qwen API Key (默认使用“硅基流动”的QwenVL模型)
########## QwenVL 视觉模型
vision_qwenvl_api_key = ""
vision_qwenvl_model_name = "Qwen/Qwen2.5-VL-32B-Instruct"
vision_qwenvl_base_url = "https://api.siliconflow.cn/v1"
vision_qwenvl_model_name = "qwen2.5-vl-32b-instruct"
vision_qwenvl_base_url = "https://dashscope.aliyuncs.com/compatible-mode/v1"
########### Vision NarratoAPI Key
narrato_api_key = "ggyY91BAO-_ULvAqKum3XexcyN1G3dP86DEzvjZDcrg"
narrato_api_url = "https://narratoinsight.scsmtech.cn/api/v1"
narrato_vision_model = "gemini-1.5-flash"
narrato_vision_key = ""
narrato_llm_model = "gpt-4o"
narrato_llm_key = ""
########## siliconflow 视觉模型
vision_siliconflow_api_key = ""
vision_siliconflow_model_name = "Qwen/Qwen2.5-VL-32B-Instruct"
vision_siliconflow_base_url = "https://api.siliconflow.cn/v1"
########## OpenAI 视觉模型
vision_openai_api_key = ""
vision_openai_model_name = "gpt-4.1-nano-2025-04-14"
vision_openai_base_url = "https://api.openai.com/v1"
########### NarratoAPI 微调模型 (未发布)
narrato_api_key = ""
narrato_api_url = ""
narrato_model = "narra-1.0-2025-05-09"
# 用于生成文案的大模型支持的提供商 (Supported providers):
# openai (默认)
# deepseek (默认使用“硅基流动”的模型)
# moonshot (月之暗面)
# openai (默认, 需要 VPN)
# siliconflow (硅基流动)
# deepseek (深度求索)
# gemini (谷歌, 需要 VPN)
# qwen (通义千问)
# gemini
text_llm_provider="deepseek"
# moonshot (月之暗面)
text_llm_provider="openai"
########## OpenAI API Key
# Get your API key at https://platform.openai.com/api-keys
text_openai_api_key = ""
text_openai_base_url = "https://api.openai.com/v1"
text_openai_model_name = "gpt-4o-mini"
text_openai_model_name = "gpt-4.1-mini-2025-04-14"
# 使用 硅基流动 第三方 API Key使用手机号注册https://cloud.siliconflow.cn/i/pyOKqFCV
# 访问 https://cloud.siliconflow.cn/account/ak 获取你的 API 密钥
text_siliconflow_api_key = ""
text_siliconflow_base_url = "https://api.siliconflow.cn/v1"
text_siliconflow_model_name = "deepseek-ai/DeepSeek-R1"
########## DeepSeek API Key
# 使用 硅基流动 第三方 API Key使用手机号注册https://cloud.siliconflow.cn/i/pyOKqFCV
# 访问 https://platform.deepseek.com/api_keys 获取你的 API 密钥
text_deepseek_api_key = ""
text_deepseek_base_url = "https://api.siliconflow.cn/v1"
text_deepseek_model_name = "deepseek-ai/DeepSeek-V3"
########## Moonshot API Key
# Visit https://platform.moonshot.cn/console/api-keys to get your API key.
text_moonshot_api_key=""
text_moonshot_base_url = "https://api.moonshot.cn/v1"
text_moonshot_model_name = "moonshot-v1-8k"
########## G4F
# Visit https://github.com/xtekky/gpt4free to get more details
# Supported model list: https://github.com/xtekky/gpt4free/blob/main/g4f/models.py
text_g4f_model_name = "gpt-3.5-turbo"
########## Azure API Key
# Visit https://learn.microsoft.com/zh-cn/azure/ai-services/openai/ to get more details
# API documentation: https://learn.microsoft.com/zh-cn/azure/ai-services/openai/reference
text_azure_api_key = ""
text_azure_base_url=""
text_azure_model_name="gpt-35-turbo" # replace with your model deployment name
text_azure_api_version = "2024-02-15-preview"
text_deepseek_base_url = "https://api.deepseek.com"
text_deepseek_model_name = "deepseek-chat"
########## Gemini API Key
text_gemini_api_key=""
text_gemini_model_name = "gemini-1.5-flash"
text_gemini_model_name = "gemini-2.0-flash"
text_gemini_base_url = "https://generativelanguage.googleapis.com/v1beta/openai"
########## Qwen API Key
# Visit https://dashscope.console.aliyun.com/apiKey to get your API key
# Visit below links to get more details
# https://tongyi.aliyun.com/qianwen/
# https://help.aliyun.com/zh/dashscope/developer-reference/model-introduction
# 访问 https://bailian.console.aliyun.com/?tab=model#/api-key 获取你的 API 密钥
text_qwen_api_key = ""
text_qwen_model_name = "qwen-plus-1127"
text_qwen_base_url = "https://dashscope.aliyuncs.com/compatible-mode/v1"
# 字幕提供商、可选,支持 whisper 和 faster-whisper-large-v2"whisper"
# 默认为 faster-whisper-large-v2 模型地址https://huggingface.co/guillaumekln/faster-whisper-large-v2
subtitle_provider = "faster-whisper-large-v2"
subtitle_enabled = true
# ImageMagick
# 安装后,将自动检测到 ImageMagickWindows 除外!
# 例如,在 Windows 上 "C:\Program Files (x86)\ImageMagick-7.1.1-Q16-HDRI\magick.exe"
# 下载位置 https://imagemagick.org/archive/binaries/ImageMagick-7.1.1-29-Q16-x64-static.exe
# imagemagick_path = "C:\\Program Files (x86)\\ImageMagick-7.1.1-Q16\\magick.exe"
# FFMPEG
#
# 通常情况下ffmpeg 会被自动下载,并且会被自动检测到。
# 但是如果你的环境有问题,无法自动下载,可能会遇到如下错误:
# RuntimeError: No ffmpeg exe could be found.
# Install ffmpeg on your system, or set the IMAGEIO_FFMPEG_EXE environment variable.
# 此时你可以手动下载 ffmpeg 并设置 ffmpeg_path下载地址https://www.gyan.dev/ffmpeg/builds/
# ffmpeg_path = "C:\\Users\\harry\\Downloads\\ffmpeg.exe"
#########################################################################################
# 当视频生成成功后API服务提供的视频下载接入点默认为当前服务的地址和监听端口
# 比如 http://127.0.0.1:8080/tasks/6357f542-a4e1-46a1-b4c9-bf3bd0df5285/final-1.mp4
# 如果你需要使用域名对外提供服务一般会用nginx做代理则可以设置为你的域名
# 比如 https://xxxx.com/tasks/6357f542-a4e1-46a1-b4c9-bf3bd0df5285/final-1.mp4
# endpoint="https://xxxx.com"
# When the video is successfully generated, the API service provides a download endpoint for the video, defaulting to the service's current address and listening port.
# For example, http://127.0.0.1:8080/tasks/6357f542-a4e1-46a1-b4c9-bf3bd0df5285/final-1.mp4
# If you need to provide the service externally using a domain name (usually done with nginx as a proxy), you can set it to your domain name.
# For example, https://xxxx.com/tasks/6357f542-a4e1-46a1-b4c9-bf3bd0df5285/final-1.mp4
# endpoint="https://xxxx.com"
endpoint=""
# Video material storage location
# material_directory = "" # Indicates that video materials will be downloaded to the default folder, the default folder is ./storage/cache_videos under the current project
# material_directory = "/user/harry/videos" # Indicates that video materials will be downloaded to a specified folder
# material_directory = "task" # Indicates that video materials will be downloaded to the current task's folder, this method does not allow sharing of already downloaded video materials
# 视频素材存放位置
# material_directory = "" #表示将视频素材下载到默认的文件夹,默认文件夹为当前项目下的 ./storage/cache_videos
# material_directory = "/user/harry/videos" #表示将视频素材下载到指定的文件夹中
# material_directory = "task" #表示将视频素材下载到当前任务的文件夹中,这种方式无法共享已经下载的视频素材
material_directory = ""
# 用于任务的状态管理
enable_redis = false
redis_host = "localhost"
redis_port = 6379
redis_db = 0
redis_password = ""
# 文生视频时的最大并发任务数
max_concurrent_tasks = 5
########## Moonshot API Key
# 访问 https://platform.moonshot.cn/console/api-keys 获取你的 API 密钥
text_moonshot_api_key=""
text_moonshot_base_url = "https://api.moonshot.cn/v1"
text_moonshot_model_name = "moonshot-v1-8k"
# webui界面是否显示配置项
hide_config = false
[whisper]
# Only effective when subtitle_provider is "whisper"
# Run on GPU with FP16
# model = WhisperModel(model_size, device="cuda", compute_type="float16")
# Run on GPU with INT8
# model = WhisperModel(model_size, device="cuda", compute_type="int8_float16")
# Run on CPU with INT8
# model = WhisperModel(model_size, device="cpu", compute_type="int8")
# recommended model_size: "large-v3"
model_size="faster-whisper-large-v2"
# 如果要使用 GPU请设置 device=“cuda”
device="CPU"
compute_type="int8"
hide_config = true
[proxy]
### Use a proxy to access the Pexels API
### Format: "http://<username>:<password>@<proxy>:<port>"
### Example: "http://user:pass@proxy:1234"
### Doc: https://requests.readthedocs.io/en/latest/user/advanced/#proxies
http = "http://127.0.0.1:7890"
https = "http://127.0.0.1:7890"
[azure]
# Azure Speech API Key
# Get your API key at https://portal.azure.com/#view/Microsoft_Azure_ProjectOxford/CognitiveServicesHub/~/SpeechServices
speech_key=""
speech_region=""
enabled = false
[frames]
skip_seconds = 0
# threshold差异阈值用于判断两个连续帧之间是否发生了场景切换
# 较小的阈值(如 20更敏感能捕捉到细微的场景变化但可能会误判关键帧图片更多
# 较大的阈值(如 40更保守只捕捉明显的场景切换但可能会漏掉渐变场景关键帧图片更少
# 默认值 30在实践中是一个比较平衡的选择
threshold = 30
version = "v2"
# 提取关键帧的间隔时间
frame_interval_input = 3
# 大模型单次处理的关键帧数量
vision_batch_size = 5
vision_batch_size = 10

1
project_version Normal file
View File

@ -0,0 +1 @@
0.6.5

View File

@ -2,6 +2,9 @@
## Latest Changes
* docs(README): 更新README. PR [#138](https://github.com/linyqh/NarratoAI/pull/138) by [@linyqh](https://github.com/linyqh).
* Dev 0.6.0. PR [#137](https://github.com/linyqh/NarratoAI/pull/137) by [@linyqh](https://github.com/linyqh).
* Dev 0.6.0 . PR [#134](https://github.com/linyqh/NarratoAI/pull/134) by [@linyqh](https://github.com/linyqh).
* Dev-0.3.9. PR [#73](https://github.com/linyqh/NarratoAI/pull/73) by [@linyqh](https://github.com/linyqh).
* 0.3.9 版本发布. PR [#71](https://github.com/linyqh/NarratoAI/pull/71) by [@linyqh](https://github.com/linyqh).
* docs: add Japanese README. PR [#66](https://github.com/linyqh/NarratoAI/pull/66) by [@eltociear](https://github.com/eltociear).

View File

@ -1,38 +1,47 @@
requests~=2.31.0
moviepy==2.0.0.dev2
faster-whisper~=1.0.1
uvicorn~=0.27.1
fastapi~=0.115.4
tomli~=2.0.1
streamlit~=1.40.0
loguru~=0.7.2
aiohttp~=3.10.10
urllib3~=2.2.1
pydantic~=2.6.3
g4f~=0.3.0.4
dashscope~=1.15.0
google.generativeai>=0.8.3
python-multipart~=0.0.9
redis==5.0.3
opencv-python~=4.10.0.84
# for azure speech
# https://techcommunity.microsoft.com/t5/ai-azure-ai-services-blog/9-more-realistic-ai-voices-for-conversations-now-generally/ba-p/4099471
azure-cognitiveservices-speech~=1.37.0
git-changelog~=2.5.2
watchdog==5.0.2
pydub==0.25.1
psutil>=5.9.0
opencv-python~=4.10.0.84
scikit-learn~=1.5.2
google-generativeai~=0.8.3
pillow==10.3.0
python-dotenv~=1.0.1
openai~=1.53.0
tqdm>=4.66.6
tenacity>=9.0.0
tiktoken==0.8.0
yt-dlp==2024.11.18
pysrt==1.1.2
httpx==0.27.2
transformers==4.47.0
# 必须项
requests~=2.32.0
moviepy==2.1.1
edge-tts==6.1.19
streamlit~=1.45.0
watchdog==6.0.0
loguru~=0.7.3
tomli~=2.2.1
pydub==0.25.1
pysrt==1.1.2
openai~=1.77.0
google-generativeai>=0.8.5
# 待优化项
# opencv-python==4.11.0.86
# scikit-learn==1.6.1
# fastapi~=0.115.4
# uvicorn~=0.27.1
# pydantic~=2.11.4
# faster-whisper~=1.0.1
# tomli~=2.0.1
# aiohttp~=3.10.10
# httpx==0.27.2
# urllib3~=2.2.1
# python-multipart~=0.0.9
# redis==5.0.3
# opencv-python~=4.10.0.84
# azure-cognitiveservices-speech~=1.37.0
# git-changelog~=2.5.2
# watchdog==5.0.2
# pydub==0.25.1
# psutil>=5.9.0
# scikit-learn~=1.5.2
# pillow==10.3.0
# python-dotenv~=1.0.1
# tqdm>=4.66.6
# tenacity>=9.0.0
# tiktoken==0.8.0
# pysrt==1.1.2
# transformers==4.50.0
# yt-dlp==2025.4.30

252
webui.py
View File

@ -1,13 +1,15 @@
import streamlit as st
import os
import sys
from uuid import uuid4
from loguru import logger
from app.config import config
from webui.components import basic_settings, video_settings, audio_settings, subtitle_settings, script_settings, review_settings, merge_settings, system_settings
from webui.utils import cache, file_utils
from webui.components import basic_settings, video_settings, audio_settings, subtitle_settings, script_settings, \
review_settings, merge_settings, system_settings
# from webui.utils import cache, file_utils
from app.utils import utils
from app.utils import ffmpeg_utils
from app.models.schema import VideoClipParams, VideoAspect
from webui.utils.performance import PerformanceMonitor
# 初始化配置 - 必须是第一个 Streamlit 命令
st.set_page_config(
@ -17,7 +19,7 @@ st.set_page_config(
initial_sidebar_state="auto",
menu_items={
"Report a bug": "https://github.com/linyqh/NarratoAI/issues",
'About': f"# NarratoAI:sunglasses: 📽️ \n #### Version: v{config.project_version} \n "
'About': f"# Narrato:blue[AI] :sunglasses: 📽️ \n #### Version: v{config.project_version} \n "
f"自动化影视解说视频详情请移步https://github.com/linyqh/NarratoAI"
},
)
@ -28,6 +30,7 @@ hide_streamlit_style = """
"""
st.markdown(hide_streamlit_style, unsafe_allow_html=True)
def init_log():
"""初始化日志配置"""
from loguru import logger
@ -35,17 +38,7 @@ def init_log():
_lvl = "DEBUG"
def format_record(record):
# 增加更多需要过滤的警告消息
ignore_messages = [
"Examining the path of torch.classes raised",
"torch.cuda.is_available()",
"CUDA initialization"
]
for msg in ignore_messages:
if msg in record["message"]:
return ""
# 简化日志格式化处理不尝试按特定字符串过滤torch相关内容
file_path = record["file"].path
relative_path = os.path.relpath(file_path, config.root_dir)
record["file"].path = f"./{relative_path}"
@ -57,23 +50,54 @@ def init_log():
'- <level>{message}</>' + "\n"
return _format
# 优化日志过滤器
def log_filter(record):
ignore_messages = [
"Examining the path of torch.classes raised",
"torch.cuda.is_available()",
"CUDA initialization"
]
return not any(msg in record["message"] for msg in ignore_messages)
# 替换为更简单的过滤方式避免在过滤时访问message内容
# 此处先不设置复杂的过滤器,等应用启动后再动态添加
logger.add(
sys.stdout,
level=_lvl,
format=format_record,
colorize=True,
filter=log_filter
colorize=True
)
# 应用启动后,可以再添加更复杂的过滤器
def setup_advanced_filters():
"""在应用完全启动后设置高级过滤器"""
try:
for handler_id in logger._core.handlers:
logger.remove(handler_id)
# 重新添加带有高级过滤的处理器
def advanced_filter(record):
"""更复杂的过滤器,在应用启动后安全使用"""
ignore_messages = [
"Examining the path of torch.classes raised",
"torch.cuda.is_available()",
"CUDA initialization"
]
return not any(msg in record["message"] for msg in ignore_messages)
logger.add(
sys.stdout,
level=_lvl,
format=format_record,
colorize=True,
filter=advanced_filter
)
except Exception as e:
# 如果过滤器设置失败,确保日志仍然可用
logger.add(
sys.stdout,
level=_lvl,
format=format_record,
colorize=True
)
logger.error(f"设置高级日志过滤器失败: {e}")
# 将高级过滤器设置放到启动主逻辑后
import threading
threading.Timer(5.0, setup_advanced_filters).start()
def init_global_state():
"""初始化全局状态"""
if 'video_clip_json' not in st.session_state:
@ -85,6 +109,7 @@ def init_global_state():
if 'subclip_videos' not in st.session_state:
st.session_state['subclip_videos'] = {}
def tr(key):
"""翻译函数"""
i18n_dir = os.path.join(os.path.dirname(__file__), "webui", "i18n")
@ -92,90 +117,107 @@ def tr(key):
loc = locales.get(st.session_state['ui_language'], {})
return loc.get("Translation", {}).get(key, key)
def render_generate_button():
"""渲染生成按钮和处理逻辑"""
if st.button(tr("Generate Video"), use_container_width=True, type="primary"):
from app.services import task as tm
# 重置日志容器和记录
log_container = st.empty()
log_records = []
def log_received(msg):
with log_container:
log_records.append(msg)
st.code("\n".join(log_records))
from loguru import logger
logger.add(log_received)
config.save_config()
task_id = st.session_state.get('task_id')
if not task_id:
st.error(tr("请先裁剪视频"))
return
if not st.session_state.get('video_clip_json_path'):
st.error(tr("脚本文件不能为空"))
return
if not st.session_state.get('video_origin_path'):
st.error(tr("视频文件不能为空"))
return
st.toast(tr("生成视频"))
logger.info(tr("开始生成视频"))
# 获取所有参数
script_params = script_settings.get_script_params()
video_params = video_settings.get_video_params()
audio_params = audio_settings.get_audio_params()
subtitle_params = subtitle_settings.get_subtitle_params()
# 合并所有参数
all_params = {
**script_params,
**video_params,
**audio_params,
**subtitle_params
}
# 创建参数对象
params = VideoClipParams(**all_params)
result = tm.start_subclip(
task_id=task_id,
params=params,
subclip_path_videos=st.session_state['subclip_videos']
)
video_files = result.get("videos", [])
st.success(tr("视生成完成"))
try:
from app.services import task as tm
import torch
# 重置日志容器和记录
log_container = st.empty()
log_records = []
if video_files:
player_cols = st.columns(len(video_files) * 2 + 1)
for i, url in enumerate(video_files):
player_cols[i * 2 + 1].video(url)
except Exception as e:
logger.error(f"播放视频失败: {e}")
def log_received(msg):
with log_container:
log_records.append(msg)
st.code("\n".join(log_records))
# file_utils.open_task_folder(config.root_dir, task_id)
logger.info(tr("视频生成完成"))
from loguru import logger
logger.add(log_received)
config.save_config()
task_id = st.session_state.get('task_id')
if not task_id:
st.error(tr("请先裁剪视频"))
return
if not st.session_state.get('video_clip_json_path'):
st.error(tr("脚本文件不能为空"))
return
if not st.session_state.get('video_origin_path'):
st.error(tr("视频文件不能为空"))
return
st.toast(tr("生成视频"))
logger.info(tr("开始生成视频"))
# 获取所有参数
script_params = script_settings.get_script_params()
video_params = video_settings.get_video_params()
audio_params = audio_settings.get_audio_params()
subtitle_params = subtitle_settings.get_subtitle_params()
# 合并所有参数
all_params = {
**script_params,
**video_params,
**audio_params,
**subtitle_params
}
# 创建参数对象
params = VideoClipParams(**all_params)
result = tm.start_subclip(
task_id=task_id,
params=params,
subclip_path_videos=st.session_state['subclip_videos']
)
video_files = result.get("videos", [])
st.success(tr("视生成完成"))
try:
if video_files:
player_cols = st.columns(len(video_files) * 2 + 1)
for i, url in enumerate(video_files):
player_cols[i * 2 + 1].video(url)
except Exception as e:
logger.error(f"播放视频失败: {e}")
file_utils.open_task_folder(config.root_dir, task_id)
logger.info(tr("视频生成完成"))
finally:
PerformanceMonitor.cleanup_resources()
# 全局变量,记录是否已经打印过硬件加速信息
_HAS_LOGGED_HWACCEL_INFO = False
def main():
"""主函数"""
global _HAS_LOGGED_HWACCEL_INFO
init_log()
init_global_state()
utils.init_resources()
st.title(f"NarratoAI :sunglasses:📽️")
# 检测FFmpeg硬件加速但只打印一次日志
hwaccel_info = ffmpeg_utils.detect_hardware_acceleration()
if not _HAS_LOGGED_HWACCEL_INFO:
if hwaccel_info["available"]:
logger.info(f"FFmpeg硬件加速检测结果: 可用 | 类型: {hwaccel_info['type']} | 编码器: {hwaccel_info['encoder']} | 独立显卡: {hwaccel_info['is_dedicated_gpu']} | 参数: {hwaccel_info['hwaccel_args']}")
else:
logger.warning(f"FFmpeg硬件加速不可用: {hwaccel_info['message']}, 将使用CPU软件编码")
_HAS_LOGGED_HWACCEL_INFO = True
# 仅初始化基本资源避免过早地加载依赖PyTorch的资源
# 检查是否能分解utils.init_resources()为基本资源和高级资源(如依赖PyTorch的资源)
try:
utils.init_resources()
except Exception as e:
logger.warning(f"资源初始化时出现警告: {e}")
st.title(f"Narrato:blue[AI]:sunglasses: 📽️")
st.write(tr("Get Help"))
# 首先渲染不依赖PyTorch的UI部分
# 渲染基础设置面板
basic_settings.render_basic_settings(tr)
# 渲染合并设置
@ -190,14 +232,18 @@ def main():
audio_settings.render_audio_panel(tr)
with panel[2]:
subtitle_settings.render_subtitle_panel(tr)
# 渲染系统设置面板
system_settings.render_system_panel(tr)
# 渲染视频审查面板
review_settings.render_review_panel(tr)
# 渲染生成按钮和处理逻辑
# 放到最后渲染可能使用PyTorch的部分
# 渲染系统设置面板
with panel[2]:
system_settings.render_system_panel(tr)
# 放到最后渲染生成按钮和处理逻辑
render_generate_button()
if __name__ == "__main__":
main()

View File

@ -8,7 +8,7 @@ from webui.components import (
audio_settings,
subtitle_settings
)
from webui.utils import cache, file_utils, performance
from webui.utils import cache, file_utils
__all__ = [
'config',
@ -17,6 +17,5 @@ __all__ = [
'audio_settings',
'subtitle_settings',
'cache',
'file_utils',
'performance'
'file_utils'
]

View File

@ -3,6 +3,7 @@ import os
from uuid import uuid4
from app.config import config
from app.services import voice
from app.models.schema import AudioVolumeDefaults
from app.utils import utils
from webui.utils.cache import get_songs_cache
@ -94,12 +95,12 @@ def render_azure_v2_settings(tr):
def render_voice_parameters(tr):
"""渲染语音参数设置"""
# 音量
# 音量 - 使用统一的默认值
voice_volume = st.slider(
tr("Speech Volume"),
min_value=0.0,
max_value=1.0,
value=1.0,
min_value=AudioVolumeDefaults.MIN_VOLUME,
max_value=AudioVolumeDefaults.MAX_VOLUME,
value=AudioVolumeDefaults.VOICE_VOLUME,
step=0.01,
help=tr("Adjust the volume of the original audio")
)
@ -187,12 +188,12 @@ def render_bgm_settings(tr):
if custom_bgm_file and os.path.exists(custom_bgm_file):
st.session_state['bgm_file'] = custom_bgm_file
# 背景音乐音量
# 背景音乐音量 - 使用统一的默认值
bgm_volume = st.slider(
tr("Background Music Volume"),
min_value=0.0,
max_value=1.0,
value=0.3,
min_value=AudioVolumeDefaults.MIN_VOLUME,
max_value=AudioVolumeDefaults.MAX_VOLUME,
value=AudioVolumeDefaults.BGM_VOLUME,
step=0.01,
help=tr("Adjust the volume of the original audio")
)
@ -203,10 +204,10 @@ def get_audio_params():
"""获取音频参数"""
return {
'voice_name': config.ui.get("voice_name", ""),
'voice_volume': st.session_state.get('voice_volume', 1.0),
'voice_volume': st.session_state.get('voice_volume', AudioVolumeDefaults.VOICE_VOLUME),
'voice_rate': st.session_state.get('voice_rate', 1.0),
'voice_pitch': st.session_state.get('voice_pitch', 1.0),
'bgm_type': st.session_state.get('bgm_type', 'random'),
'bgm_file': st.session_state.get('bgm_file', ''),
'bgm_volume': st.session_state.get('bgm_volume', 0.3),
'bgm_volume': st.session_state.get('bgm_volume', AudioVolumeDefaults.BGM_VOLUME),
}

View File

@ -1,7 +1,10 @@
import traceback
import streamlit as st
import os
from app.config import config
from app.utils import utils
from loguru import logger
def render_basic_settings(tr):
@ -61,25 +64,25 @@ def render_proxy_settings(tr):
proxy_enabled = st.checkbox(tr("Enable Proxy"), value=proxy_enabled)
# 保存代理开关状态
config.proxy["enabled"] = proxy_enabled
# config.proxy["enabled"] = proxy_enabled
# 只有在代理启用时才显示代理设置输入框
if proxy_enabled:
HTTP_PROXY = st.text_input(tr("HTTP_PROXY"), value=proxy_url_http)
HTTPS_PROXY = st.text_input(tr("HTTPs_PROXY"), value=proxy_url_https)
if HTTP_PROXY:
if HTTP_PROXY and HTTPS_PROXY:
config.proxy["http"] = HTTP_PROXY
os.environ["HTTP_PROXY"] = HTTP_PROXY
if HTTPS_PROXY:
config.proxy["https"] = HTTPS_PROXY
os.environ["HTTP_PROXY"] = HTTP_PROXY
os.environ["HTTPS_PROXY"] = HTTPS_PROXY
# logger.debug(f"代理已启用: {HTTP_PROXY}")
else:
# 当代理被禁用时,清除环境变量和配置
os.environ.pop("HTTP_PROXY", None)
os.environ.pop("HTTPS_PROXY", None)
config.proxy["http"] = ""
config.proxy["https"] = ""
# config.proxy["http"] = ""
# config.proxy["https"] = ""
def test_vision_model_connection(api_key, base_url, model_name, provider, tr):
@ -105,29 +108,6 @@ def test_vision_model_connection(api_key, base_url, model_name, provider, tr):
return True, tr("gemini model is available")
except Exception as e:
return False, f"{tr('gemini model is not available')}: {str(e)}"
elif provider.lower() == 'qwenvl':
from openai import OpenAI
try:
client = OpenAI(
api_key=api_key,
base_url=base_url or "https://dashscope.aliyuncs.com/compatible-mode/v1"
)
# 发送一个简单的测试请求
response = client.chat.completions.create(
model=model_name or "qwen-vl-max-latest",
messages=[{"role": "user", "content": "直接回复我文本'当前网络可用'"}]
)
if response and response.choices:
return True, tr("QwenVL model is available")
else:
return False, tr("QwenVL model returned invalid response")
except Exception as e:
return False, f"{tr('QwenVL model is not available')}: {str(e)}"
elif provider.lower() == 'narratoapi':
import requests
try:
@ -145,9 +125,46 @@ def test_vision_model_connection(api_key, base_url, model_name, provider, tr):
return False, f"{tr('NarratoAPI is not available')}: HTTP {response.status_code}"
except Exception as e:
return False, f"{tr('NarratoAPI is not available')}: {str(e)}"
else:
return False, f"{tr('Unsupported provider')}: {provider}"
from openai import OpenAI
try:
client = OpenAI(
api_key=api_key,
base_url=base_url,
)
response = client.chat.completions.create(
model=model_name,
messages=[
{
"role": "system",
"content": [{"type": "text", "text": "You are a helpful assistant."}],
},
{
"role": "user",
"content": [
{
"type": "image_url",
"image_url": {
"url": "https://help-static-aliyun-doc.aliyuncs.com/file-manage-files/zh-CN/20241022/emyrja/dog_and_girl.jpeg"
},
},
{"type": "text", "text": "回复我网络可用即可"},
],
},
],
)
if response and response.choices:
return True, tr("QwenVL model is available")
else:
return False, tr("QwenVL model returned invalid response")
except Exception as e:
# logger.debug(api_key)
# logger.debug(base_url)
# logger.debug(model_name)
return False, f"{tr('QwenVL model is not available')}: {str(e)}"
def render_vision_llm_settings(tr):
@ -155,7 +172,7 @@ def render_vision_llm_settings(tr):
st.subheader(tr("Vision Model Settings"))
# 视频分析模型提供商选择
vision_providers = ['Gemini', 'QwenVL', 'NarratoAPI(待发布)']
vision_providers = ['Siliconflow', 'Gemini', 'QwenVL', 'OpenAI']
saved_vision_provider = config.app.get("vision_llm_provider", "Gemini").lower()
saved_provider_index = 0
@ -191,8 +208,8 @@ def render_vision_llm_settings(tr):
)
st_vision_model_name = st.text_input(
tr("Vision Model Name"),
value=vision_model_name or "gemini-1.5-flash",
help=tr("Default: gemini-1.5-flash")
value=vision_model_name or "gemini-2.0-flash-lite",
help=tr("Default: gemini-2.0-flash-lite")
)
elif vision_provider == 'qwenvl':
st_vision_base_url = st.text_input(
@ -258,53 +275,45 @@ def test_text_model_connection(api_key, base_url, model_name, provider, tr):
"Authorization": f"Bearer {api_key}",
"Content-Type": "application/json"
}
# 如果没有指定base_url使用默认值
if not base_url:
if provider.lower() == 'openai':
base_url = "https://api.openai.com/v1"
elif provider.lower() == 'moonshot':
base_url = "https://api.moonshot.cn/v1"
elif provider.lower() == 'deepseek':
base_url = "https://api.deepseek.com/v1"
# 构建测试URL
test_url = f"{base_url.rstrip('/')}/chat/completions"
# 特殊处理Gemini
if provider.lower() == 'gemini':
import google.generativeai as genai
try:
genai.configure(api_key=api_key)
model = genai.GenerativeModel(model_name or 'gemini-pro')
model = genai.GenerativeModel(model_name)
model.generate_content("直接回复我文本'当前网络可用'")
return True, tr("Gemini model is available")
except Exception as e:
return False, f"{tr('Gemini model is not available')}: {str(e)}"
# 构建测试消息
test_data = {
"model": model_name,
"messages": [
{"role": "user", "content": "直接回复我文本'当前网络可用'"}
],
"max_tokens": 10
}
# 发送测试请求
response = requests.post(
test_url,
headers=headers,
json=test_data,
timeout=10
)
if response.status_code == 200:
return True, tr("Text model is available")
else:
return False, f"{tr('Text model is not available')}: HTTP {response.status_code}"
test_url = f"{base_url.rstrip('/')}/chat/completions"
# 构建测试消息
test_data = {
"model": model_name,
"messages": [
{"role": "user", "content": "直接回复我文本'当前网络可用'"}
],
"stream": False
}
# 发送测试请求
response = requests.post(
test_url,
headers=headers,
json=test_data,
)
# logger.debug(model_name)
# logger.debug(api_key)
# logger.debug(test_url)
if response.status_code == 200:
return True, tr("Text model is available")
else:
return False, f"{tr('Text model is not available')}: HTTP {response.status_code}"
except Exception as e:
logger.error(traceback.format_exc())
return False, f"{tr('Connection failed')}: {str(e)}"
@ -313,8 +322,8 @@ def render_text_llm_settings(tr):
st.subheader(tr("Text Generation Model Settings"))
# 文案生成模型提供商选择
text_providers = ['DeepSeek', 'OpenAI', 'Qwen', 'Moonshot', 'Gemini']
saved_text_provider = config.app.get("text_llm_provider", "DeepSeek").lower()
text_providers = ['OpenAI', 'Siliconflow', 'DeepSeek', 'Gemini', 'Qwen', 'Moonshot']
saved_text_provider = config.app.get("text_llm_provider", "OpenAI").lower()
saved_provider_index = 0
for i, provider in enumerate(text_providers):
@ -331,9 +340,9 @@ def render_text_llm_settings(tr):
config.app["text_llm_provider"] = text_provider
# 获取已保存的文本模型配置
text_api_key = config.app.get(f"text_{text_provider}_api_key", "")
text_base_url = config.app.get(f"text_{text_provider}_base_url", "")
text_model_name = config.app.get(f"text_{text_provider}_model_name", "")
text_api_key = config.app.get(f"text_{text_provider}_api_key")
text_base_url = config.app.get(f"text_{text_provider}_base_url")
text_model_name = config.app.get(f"text_{text_provider}_model_name")
# 渲染文本模型配置输入框
st_text_api_key = st.text_input(tr("Text API Key"), value=text_api_key, type="password")
@ -364,11 +373,11 @@ def render_text_llm_settings(tr):
if st_text_model_name:
config.app[f"text_{text_provider}_model_name"] = st_text_model_name
# Cloudflare 特殊配置
if text_provider == 'cloudflare':
st_account_id = st.text_input(
tr("Account ID"),
value=config.app.get(f"text_{text_provider}_account_id", "")
)
if st_account_id:
config.app[f"text_{text_provider}_account_id"] = st_account_id
# # Cloudflare 特殊配置
# if text_provider == 'cloudflare':
# st_account_id = st.text_input(
# tr("Account ID"),
# value=config.app.get(f"text_{text_provider}_account_id", "")
# )
# if st_account_id:
# config.app[f"text_{text_provider}_account_id"] = st_account_id

View File

@ -1,20 +1,13 @@
import os
import time
import math
import sys
import tempfile
import traceback
import shutil
import streamlit as st
from loguru import logger
from typing import List, Dict, Tuple
from typing import List, Dict
from dataclasses import dataclass
from streamlit.runtime.uploaded_file_manager import UploadedFile
from webui.utils.merge_video import merge_videos_and_subtitles
from app.utils.utils import video_dir, srt_dir
from app.services.subtitle import extract_audio_and_create_subtitle
# 定义临时目录路径
TEMP_MERGE_DIR = os.path.join("storage", "temp", "merge")
@ -169,38 +162,38 @@ def render_merge_settings(tr):
else:
st.warning(tr("Missing Subtitle"))
# 如果有视频但没有字幕,显示一键转录按钮
if os.path.exists(video_path):
if st.button(tr("One-Click Transcribe"), key=f"transcribe_{base_name}"):
with st.spinner(tr("Transcribing...")):
try:
# 生成字幕文件
result = extract_audio_and_create_subtitle(video_path, subtitle_path)
if result:
# 读取生成的字幕文件内容并显示预览
with open(subtitle_path, 'r', encoding='utf-8') as f:
subtitle_content = f.read()
st.markdown(tr("Subtitle Preview"))
st.text_area(
"Subtitle Content",
value=subtitle_content,
height=150,
label_visibility="collapsed",
key=f"subtitle_preview_transcribed_{base_name}"
)
st.success(tr("Transcription Complete!"))
# 更新pair的字幕文件路径
pair.subtitle_file = subtitle_path
else:
st.error(tr("Transcription Failed. Please try again."))
except Exception as e:
error_message = str(e)
logger.error(traceback.format_exc())
if "rate limit exceeded" in error_message.lower():
st.error(tr("API rate limit exceeded. Please wait about an hour and try again."))
elif "resource_exhausted" in error_message.lower():
st.error(tr("Resources exhausted. Please try again later."))
else:
st.error(f"{tr('Transcription Failed')}: {str(e)}")
# if os.path.exists(video_path):
# if st.button(tr("One-Click Transcribe"), key=f"transcribe_{base_name}"):
# with st.spinner(tr("Transcribing...")):
# try:
# # 生成字幕文件
# result = extract_audio_and_create_subtitle(video_path, subtitle_path)
# if result:
# # 读取生成的字幕文件内容并显示预览
# with open(subtitle_path, 'r', encoding='utf-8') as f:
# subtitle_content = f.read()
# st.markdown(tr("Subtitle Preview"))
# st.text_area(
# "Subtitle Content",
# value=subtitle_content,
# height=150,
# label_visibility="collapsed",
# key=f"subtitle_preview_transcribed_{base_name}"
# )
# st.success(tr("Transcription Complete!"))
# # 更新pair的字幕文件路径
# pair.subtitle_file = subtitle_path
# else:
# st.error(tr("Transcription Failed. Please try again."))
# except Exception as e:
# error_message = str(e)
# logger.error(traceback.format_exc())
# if "rate limit exceeded" in error_message.lower():
# st.error(tr("API rate limit exceeded. Please wait about an hour and try again."))
# elif "resource_exhausted" in error_message.lower():
# st.error(tr("Resources exhausted. Please try again later."))
# else:
# st.error(f"{tr('Transcription Failed')}: {str(e)}")
# 排序输入框
order = st.number_input(
@ -285,8 +278,8 @@ def render_merge_settings(tr):
error_message = str(e)
if "moviepy" in error_message.lower():
st.error(tr("Error processing video files. Please check if the videos are valid MP4 files."))
elif "pysrt" in error_message.lower():
st.error(tr("Error processing subtitle files. Please check if the subtitles are valid SRT files."))
# elif "pysrt" in error_message.lower():
# st.error(tr("Error processing subtitle files. Please check if the subtitles are valid SRT files."))
else:
st.error(f"{tr('Error during merge')}: {error_message}")

View File

@ -33,7 +33,7 @@ def render_video_item(tr, video_list, subclip_videos, index):
video_script = video_list[index]
# 显示时间戳
timestamp = video_script.get('timestamp', '')
timestamp = video_script.get('_id', '')
st.text_area(
tr("Timestamp"),
value=timestamp,

View File

@ -11,6 +11,7 @@ from app.models.schema import VideoClipParams
from app.utils import utils, check_script
from webui.tools.generate_script_docu import generate_script_docu
from webui.tools.generate_script_short import generate_script_short
from webui.tools.generate_short_summary import generate_script_short_sunmmary
def render_script_panel(tr):
@ -27,15 +28,20 @@ def render_script_panel(tr):
# 获取当前选择的脚本类型
script_path = st.session_state.get('video_clip_json_path', '')
# 根据脚本类型显示不同的布局
if script_path == "short":
# Short Generate模式下显示的内容
render_short_generate_options(tr)
else:
# 其他模式下保持原有布局
# 渲染视频主题和提示词
if script_path == "auto":
# 画面解说
render_video_details(tr)
elif script_path == "short":
# 短剧混剪
render_short_generate_options(tr)
elif script_path == "summary":
# 短剧解说
short_drama_summary(tr)
else:
# 默认为空
pass
# 渲染脚本操作按钮
render_script_buttons(tr, params)
@ -44,10 +50,11 @@ def render_script_panel(tr):
def render_script_file(tr, params):
"""渲染脚本文件选择"""
script_list = [
(tr("None"), ""),
(tr("Auto Generate"), "auto"),
(tr("None"), ""),
(tr("Auto Generate"), "auto"),
(tr("Short Generate"), "short"),
(tr("Upload Script"), "upload_script") # 新增上传脚本选项
(tr("Short Drama Summary"), "summary"),
(tr("Upload Script"), "upload_script")
]
# 获取已有脚本文件
@ -100,11 +107,11 @@ def render_script_file(tr, params):
# 读取上传的JSON内容并验证格式
script_content = uploaded_file.read().decode('utf-8')
json_data = json.loads(script_content)
# 保存到脚本目录
script_file_path = os.path.join(script_dir, uploaded_file.name)
file_name, file_extension = os.path.splitext(uploaded_file.name)
# 如果文件已存在,添加时间戳
if os.path.exists(script_file_path):
timestamp = time.strftime("%Y%m%d%H%M%S")
@ -114,14 +121,14 @@ def render_script_file(tr, params):
# 写入文件
with open(script_file_path, "w", encoding='utf-8') as f:
json.dump(json_data, f, ensure_ascii=False, indent=2)
# 更新状态
st.success(tr("Script Uploaded Successfully"))
st.session_state['video_clip_json_path'] = script_file_path
params.video_clip_json_path = script_file_path
time.sleep(1)
st.rerun()
except json.JSONDecodeError:
st.error(tr("Invalid JSON format"))
except Exception as e:
@ -180,6 +187,7 @@ def render_short_generate_options(tr):
渲染Short Generate模式下的特殊选项
在Short Generate模式下替换原有的输入框为自定义片段选项
"""
short_drama_summary(tr)
# 显示自定义片段数量选择器
custom_clips = st.number_input(
tr("自定义片段"),
@ -193,7 +201,7 @@ def render_short_generate_options(tr):
def render_video_details(tr):
"""渲染视频主题和提示词"""
"""画面解说 渲染视频主题和提示词"""
video_theme = st.text_input(tr("Video Theme"))
custom_prompt = st.text_area(
tr("Generation Prompt"),
@ -201,57 +209,104 @@ def render_video_details(tr):
help=tr("Custom prompt for LLM, leave empty to use default prompt"),
height=180
)
# 非短视频模式下显示原有的三个输入框
input_cols = st.columns(2)
with input_cols[0]:
st.number_input(
tr("Frame Interval (seconds)"),
min_value=0,
value=st.session_state.get('frame_interval_input', config.frames.get('frame_interval_input', 3)),
help=tr("Frame Interval (seconds) (More keyframes consume more tokens)"),
key="frame_interval_input"
)
with input_cols[1]:
st.number_input(
tr("Batch Size"),
min_value=0,
value=st.session_state.get('vision_batch_size', config.frames.get('vision_batch_size', 10)),
help=tr("Batch Size (More keyframes consume more tokens)"),
key="vision_batch_size"
)
st.session_state['video_theme'] = video_theme
st.session_state['custom_prompt'] = custom_prompt
return video_theme, custom_prompt
def short_drama_summary(tr):
"""短剧解说 渲染视频主题和提示词"""
# 检查是否已经处理过字幕文件
if 'subtitle_file_processed' not in st.session_state:
st.session_state['subtitle_file_processed'] = False
subtitle_file = st.file_uploader(
tr("上传字幕文件"),
type=["srt"],
accept_multiple_files=False,
key="subtitle_file_uploader" # 添加唯一key
)
# 显示当前已上传的字幕文件路径
if 'subtitle_path' in st.session_state and st.session_state['subtitle_path']:
st.info(f"已上传字幕: {os.path.basename(st.session_state['subtitle_path'])}")
if st.button(tr("清除已上传字幕")):
st.session_state['subtitle_path'] = None
st.session_state['subtitle_file_processed'] = False
st.rerun()
# 只有当有文件上传且尚未处理时才执行处理逻辑
if subtitle_file is not None and not st.session_state['subtitle_file_processed']:
try:
# 读取上传的SRT内容
script_content = subtitle_file.read().decode('utf-8')
# 保存到字幕目录
script_file_path = os.path.join(utils.subtitle_dir(), subtitle_file.name)
file_name, file_extension = os.path.splitext(subtitle_file.name)
# 如果文件已存在,添加时间戳
if os.path.exists(script_file_path):
timestamp = time.strftime("%Y%m%d%H%M%S")
file_name_with_timestamp = f"{file_name}_{timestamp}"
script_file_path = os.path.join(utils.subtitle_dir(), file_name_with_timestamp + file_extension)
# 直接写入SRT内容不进行JSON转换
with open(script_file_path, "w", encoding='utf-8') as f:
f.write(script_content)
# 更新状态
st.success(tr("字幕上传成功"))
st.session_state['subtitle_path'] = script_file_path
st.session_state['subtitle_file_processed'] = True # 标记已处理
# 避免使用rerun使用更新状态的方式
# st.rerun()
except Exception as e:
st.error(f"{tr('Upload failed')}: {str(e)}")
# 名称输入框
video_theme = st.text_input(tr("短剧名称"))
st.session_state['video_theme'] = video_theme
# 数字输入框
temperature = st.slider("temperature", 0.0, 2.0, 0.7)
st.session_state['temperature'] = temperature
return video_theme
def render_script_buttons(tr, params):
"""渲染脚本操作按钮"""
# 获取当前选择的脚本类型
script_path = st.session_state.get('video_clip_json_path', '')
# 根据脚本类型显示不同的设置
if script_path != "short":
# 非短视频模式下显示原有的三个输入框
input_cols = st.columns(3)
with input_cols[0]:
skip_seconds = st.number_input(
"skip_seconds",
min_value=0,
value=st.session_state.get('skip_seconds', config.frames.get('skip_seconds', 0)),
help=tr("Skip the first few seconds"),
key="skip_seconds_input"
)
st.session_state['skip_seconds'] = skip_seconds
with input_cols[1]:
threshold = st.number_input(
"threshold",
min_value=0,
value=st.session_state.get('threshold', config.frames.get('threshold', 30)),
help=tr("Difference threshold"),
key="threshold_input"
)
st.session_state['threshold'] = threshold
with input_cols[2]:
vision_batch_size = st.number_input(
"vision_batch_size",
min_value=1,
max_value=20,
value=st.session_state.get('vision_batch_size', config.frames.get('vision_batch_size', 5)),
help=tr("Vision processing batch size"),
key="vision_batch_size_input"
)
st.session_state['vision_batch_size'] = vision_batch_size
# 生成/加载按钮
if script_path == "auto":
button_name = tr("Generate Video Script")
elif script_path == "short":
button_name = tr("Generate Short Video Script")
elif script_path == "summary":
button_name = tr("生成短剧解说脚本")
elif script_path.endswith("json"):
button_name = tr("Load Video Script")
else:
@ -259,12 +314,18 @@ def render_script_buttons(tr, params):
if st.button(button_name, key="script_action", disabled=not script_path):
if script_path == "auto":
generate_script_docu(tr, params)
# 执行纪录片视频脚本生成(视频无字幕无配音)
generate_script_docu(params)
elif script_path == "short":
# 获取自定义片段数量参数
custom_clips = st.session_state.get('custom_clips', 5)
# 直接将custom_clips作为参数传递而不是通过params对象
# 执行 短剧混剪 脚本生成
custom_clips = st.session_state.get('custom_clips')
generate_script_short(tr, params, custom_clips)
elif script_path == "summary":
# 执行 短剧解说 脚本生成
subtitle_path = st.session_state.get('subtitle_path')
video_theme = st.session_state.get('video_theme')
temperature = st.session_state.get('temperature')
generate_script_short_sunmmary(params, subtitle_path, video_theme, temperature)
else:
load_script(tr, script_path)
@ -366,12 +427,11 @@ def crop_video(tr, params):
utils.cut_video(params, update_progress)
time.sleep(0.5)
progress_bar.progress(100)
status_text.text("剪完成!")
st.success("视频剪辑成功完成!")
except Exception as e:
st.error(f"剪辑过程中发生错误: {str(e)}")
finally:
time.sleep(2)
time.sleep(1)
progress_bar.empty()
status_text.empty()

View File

@ -127,7 +127,7 @@ def get_subtitle_params():
'font_name': st.session_state.get('font_name', ''),
'font_size': st.session_state.get('font_size', 60),
'text_fore_color': st.session_state.get('text_fore_color', '#FFFFFF'),
'position': st.session_state.get('subtitle_position', 'bottom'),
'subtitle_position': st.session_state.get('subtitle_position', 'bottom'),
'custom_position': st.session_state.get('custom_position', 70.0),
'stroke_color': st.session_state.get('stroke_color', '#000000'),
'stroke_width': st.session_state.get('stroke_width', 1.5),

View File

@ -1,5 +1,5 @@
import streamlit as st
from app.models.schema import VideoClipParams, VideoAspect
from app.models.schema import VideoClipParams, VideoAspect, AudioVolumeDefaults
def render_video_panel(tr):
@ -41,12 +41,12 @@ def render_video_config(tr, params):
)
st.session_state['video_quality'] = video_qualities[quality_index][1]
# 原声音量
# 原声音量 - 使用统一的默认值
params.original_volume = st.slider(
tr("Original Volume"),
min_value=0.0,
max_value=1.0,
value=0.7,
min_value=AudioVolumeDefaults.MIN_VOLUME,
max_value=AudioVolumeDefaults.MAX_VOLUME,
value=AudioVolumeDefaults.ORIGINAL_VOLUME,
step=0.01,
help=tr("Adjust the volume of the original audio")
)
@ -58,5 +58,5 @@ def get_video_params():
return {
'video_aspect': st.session_state.get('video_aspect', VideoAspect.portrait.value),
'video_quality': st.session_state.get('video_quality', '1080p'),
'original_volume': st.session_state.get('original_volume', 0.7)
'original_volume': st.session_state.get('original_volume', AudioVolumeDefaults.ORIGINAL_VOLUME)
}

View File

@ -4,6 +4,21 @@ from loguru import logger
from typing import Dict, Any, Optional
from dataclasses import dataclass
def get_version_from_file():
"""从project_version文件中读取版本号"""
try:
version_file = os.path.join(
os.path.dirname(os.path.dirname(os.path.dirname(__file__))),
"project_version"
)
if os.path.isfile(version_file):
with open(version_file, "r", encoding="utf-8") as f:
return f.read().strip()
return "0.1.0" # 默认版本号
except Exception as e:
logger.error(f"读取版本号文件失败: {str(e)}")
return "0.1.0" # 默认版本号
@dataclass
class WebUIConfig:
"""WebUI配置类"""
@ -16,7 +31,7 @@ class WebUIConfig:
# Azure配置
azure: Dict[str, str] = None
# 项目版本
project_version: str = "0.1.0"
project_version: str = get_version_from_file()
# 项目根目录
root_dir: str = None
# Gemini API Key
@ -71,13 +86,13 @@ def load_config(config_path: Optional[str] = None) -> WebUIConfig:
with open(config_path, "rb") as f:
config_dict = tomli.load(f)
# 创建配置对象
# 创建配置对象,使用从文件读取的版本号
config = WebUIConfig(
ui=config_dict.get("ui", {}),
proxy=config_dict.get("proxy", {}),
app=config_dict.get("app", {}),
azure=config_dict.get("azure", {}),
project_version=config_dict.get("project_version", "0.1.0")
# 不再从配置文件中获取project_version
)
return config
@ -105,13 +120,13 @@ def save_config(config: WebUIConfig, config_path: Optional[str] = None) -> bool:
# 确保目录存在
os.makedirs(os.path.dirname(config_path), exist_ok=True)
# 转换为字典
# 转换为字典,不再保存版本号到配置文件
config_dict = {
"ui": config.ui,
"proxy": config.proxy,
"app": config.app,
"azure": config.azure,
"project_version": config.project_version
"azure": config.azure
# 不再保存project_version到配置文件
}
# 保存配置
@ -153,8 +168,7 @@ def update_config(config_dict: Dict[str, Any]) -> bool:
config.app.update(config_dict["app"])
if "azure" in config_dict:
config.azure.update(config_dict["azure"])
if "project_version" in config_dict:
config.project_version = config_dict["project_version"]
# 不再从配置字典更新project_version
# 保存配置
return save_config(config)

View File

@ -85,6 +85,7 @@
"TTS Provider": "TTS Provider",
"Hide Log": "Hide Log",
"Upload Local Files": "Upload Local Files",
"File Uploaded Successfully": "File Uploaded Successfully"
"File Uploaded Successfully": "File Uploaded Successfully",
"Frame Interval (seconds)": "Frame Interval (seconds) (More keyframes consume more tokens)"
}
}

View File

@ -115,7 +115,6 @@
"Text Generation Model Settings": "文案生成模型设置",
"LLM Model Name": "大语言模型名称",
"LLM Model API Key": "大语言模型 API 密钥",
"Batch Size": "批处理大小",
"Text Model Provider": "文案生成模型提供商",
"Text API Key": "文案生成 API 密钥",
"Text Base URL": "文案生成接口地址",
@ -144,7 +143,7 @@
"Merge All Files": "合并所有文件",
"Merge Function Not Implemented": "合并功能待实现",
"No Matched Pairs Found": "未找到匹配的文件对",
"Missing Subtitle": "缺少对应的字幕文件",
"Missing Subtitle": "缺少对应的字幕文件, 请使用其他软件完成字幕转录,比如剪映等",
"Missing Video": "缺少对应的视频文件",
"All Uploaded Files": "所有上传的文件",
"Order": "排序序号",
@ -192,6 +191,11 @@
"Generate Short Video Script": "AI生成短剧混剪脚本",
"Adjust the volume of the original audio": "调整原始音频的音量",
"Original Volume": "视频音量",
"Auto Generate": "纪录片解说 (画面解说)"
"Auto Generate": "纪录片解说 (画面解说)",
"Frame Interval (seconds)": "帧间隔 (秒)",
"Frame Interval (seconds) (More keyframes consume more tokens)": "帧间隔 (秒) (更多关键帧消耗更多令牌)",
"Batch Size": "批处理大小",
"Batch Size (More keyframes consume more tokens)": "批处理大小, 每批处理越少消耗 token 越多",
"Short Drama Summary": "短剧解说(仅支持 gemini-2.0-flash)"
}
}
}

View File

@ -24,15 +24,13 @@ def create_vision_analyzer(provider, api_key, model, base_url):
"""
if provider == 'gemini':
return gemini_analyzer.VisionAnalyzer(model_name=model, api_key=api_key)
elif provider == 'qwenvl':
else:
# 只传入必要的参数
return qwenvl_analyzer.QwenAnalyzer(
model_name=model,
api_key=api_key,
base_url=base_url
)
else:
raise ValueError(f"不支持的视觉分析提供商: {provider}")
def get_batch_timestamps(batch_files, prev_batch_files=None):
@ -152,7 +150,7 @@ def chekc_video_config(video_params):
session.mount("https://", adapter)
try:
session.post(
f"{config.app.get('narrato_api_url')}/video/config",
f"https://dev.narratoai.cn/api/v1/admin/external-api-config/services",
headers=headers,
json=video_params,
timeout=30,

View File

@ -4,21 +4,20 @@ import json
import time
import asyncio
import traceback
import requests
import streamlit as st
from loguru import logger
from requests.adapters import HTTPAdapter
from urllib3.util.retry import Retry
from datetime import datetime
from app.config import config
from app.utils.script_generator import ScriptProcessor
from app.utils import utils, video_processor, video_processor_v2, qwenvl_analyzer
from app.utils import utils, video_processor
from webui.tools.base import create_vision_analyzer, get_batch_files, get_batch_timestamps, chekc_video_config
def generate_script_docu(tr, params):
def generate_script_docu(params):
"""
生成 纪录片 视频脚本
要求: 原视频无字幕无配音
适合场景: 纪录片动物搞笑解说荒野建造等
"""
progress_bar = st.progress(0)
status_text = st.empty()
@ -35,8 +34,9 @@ def generate_script_docu(tr, params):
if not params.video_origin_path:
st.error("请先选择视频文件")
return
# ===================提取键帧===================
"""
1. 提取键帧
"""
update_progress(10, "正在提取关键帧...")
# 创建临时目录用于存储关键帧
@ -64,21 +64,12 @@ def generate_script_docu(tr, params):
os.makedirs(video_keyframes_dir, exist_ok=True)
# 初始化视频处理器
if config.frames.get("version") == "v2":
processor = video_processor_v2.VideoProcessor(params.video_origin_path)
# 处理视频并提取关键帧
processor.process_video_pipeline(
output_dir=video_keyframes_dir,
skip_seconds=st.session_state.get('skip_seconds'),
threshold=st.session_state.get('threshold')
)
else:
processor = video_processor.VideoProcessor(params.video_origin_path)
# 处理视频并提取关键帧
processor.process_video(
output_dir=video_keyframes_dir,
skip_seconds=0
)
processor = video_processor.VideoProcessor(params.video_origin_path)
# 处理视频并提取关键帧
processor.process_video_pipeline(
output_dir=video_keyframes_dir,
interval_seconds=st.session_state.get('frame_interval_input'),
)
# 获取所有关键文件路径
for filename in sorted(os.listdir(video_keyframes_dir)):
@ -101,9 +92,12 @@ def generate_script_docu(tr, params):
raise Exception(f"关键帧提取失败: {str(e)}")
# 根据不同的 LLM 提供商处理
"""
2. 视觉分析(批量分析每一帧)
"""
vision_llm_provider = st.session_state.get('vision_llm_providers').lower()
logger.debug(f"Vision LLM 提供商: {vision_llm_provider}")
llm_params = dict()
logger.debug(f"VLM 视觉大模型提供商: {vision_llm_provider}")
try:
# ===================初始化视觉分析器===================
@ -114,14 +108,18 @@ def generate_script_docu(tr, params):
vision_api_key = st.session_state.get('vision_gemini_api_key')
vision_model = st.session_state.get('vision_gemini_model_name')
vision_base_url = st.session_state.get('vision_gemini_base_url')
elif vision_llm_provider == 'qwenvl':
vision_api_key = st.session_state.get('vision_qwenvl_api_key')
vision_model = st.session_state.get('vision_qwenvl_model_name', 'qwen-vl-max-latest')
vision_base_url = st.session_state.get('vision_qwenvl_base_url')
else:
raise ValueError(f"不支持的视觉分析提供商: {vision_llm_provider}")
vision_api_key = st.session_state.get(f'vision_{vision_llm_provider}_api_key')
vision_model = st.session_state.get(f'vision_{vision_llm_provider}_model_name')
vision_base_url = st.session_state.get(f'vision_{vision_llm_provider}_base_url')
# 创建视觉分析器实例
llm_params = {
"vision_provider": vision_llm_provider,
"vision_api_key": vision_api_key,
"vision_model_name": vision_model,
"vision_base_url": vision_base_url,
}
analyzer = create_vision_analyzer(
provider=vision_llm_provider,
api_key=vision_api_key,
@ -137,111 +135,245 @@ def generate_script_docu(tr, params):
# 执行异步分析
vision_batch_size = st.session_state.get('vision_batch_size') or config.frames.get("vision_batch_size")
vision_analysis_prompt = """
我提供了 %s 张视频帧它们按时间顺序排列代表一个连续的视频片段请仔细分析每一帧的内容并关注帧与帧之间的变化以理解整个片段的活动
首先请详细描述每一帧的关键视觉信息包含主要内容人物动作和场景
然后基于所有帧的分析请用**简洁的语言**总结整个视频片段中发生的主要活动或事件流程
请务必使用 JSON 格式输出你的结果JSON 结构应如下
{
"frame_observations": [
{
"frame_number": 1, // 或其他标识帧的方式
"observation": "描述每张视频帧中的主要内容、人物、动作和场景。"
},
// ... 更多帧的观察 ...
],
"overall_activity_summary": "在这里填写你总结的整个片段的主要活动,保持简洁。"
}
请务必不要遗漏视频帧我提供了 %s 张视频帧frame_observations 必须包含 %s 个元素
请只返回 JSON 字符串不要包含任何其他解释性文字
"""
results = loop.run_until_complete(
analyzer.analyze_images(
images=keyframe_files,
prompt=config.app.get('vision_analysis_prompt'),
prompt=vision_analysis_prompt,
batch_size=vision_batch_size
)
)
loop.close()
"""
3. 处理分析结果格式化为 json 数据
"""
# ===================处理分析结果===================
update_progress(60, "正在整理分析结果...")
# 合并所有批次的析结果
# 合并所有批次的析结果
frame_analysis = ""
merged_frame_observations = [] # 合并所有批次的帧观察
overall_activity_summaries = [] # 合并所有批次的整体总结
prev_batch_files = None
frame_counter = 1 # 初始化帧计数器,用于给所有帧分配连续的序号
# logger.debug(json.dumps(results, indent=4, ensure_ascii=False))
# 确保分析目录存在
analysis_dir = os.path.join(utils.storage_dir(), "temp", "analysis")
os.makedirs(analysis_dir, exist_ok=True)
origin_res = os.path.join(analysis_dir, "frame_analysis.json")
with open(origin_res, 'w', encoding='utf-8') as f:
json.dump(results, f, ensure_ascii=False, indent=2)
# 开始处理
for result in results:
if 'error' in result:
logger.warning(f"批次 {result['batch_index']} 处理出现警告: {result['error']}")
# 获取当前批次的文件列表 keyframe_001136_000045.jpg 将 000045 精度提升到 毫秒
continue
# 获取当前批次的文件列表
batch_files = get_batch_files(keyframe_files, result, vision_batch_size)
logger.debug(f"批次 {result['batch_index']} 处理完成,共 {len(batch_files)} 张图片")
# logger.debug(batch_files)
first_timestamp, last_timestamp, _ = get_batch_timestamps(batch_files, prev_batch_files)
# 获取批次的时间戳范围
first_timestamp, last_timestamp, timestamp_range = get_batch_timestamps(batch_files, prev_batch_files)
logger.debug(f"处理时间戳: {first_timestamp}-{last_timestamp}")
# 添加带时间戳的分析结果
frame_analysis += f"\n=== {first_timestamp}-{last_timestamp} ===\n"
frame_analysis += result['response']
frame_analysis += "\n"
# 解析响应中的JSON数据
response_text = result['response']
try:
# 处理可能包含```json```格式的响应
if "```json" in response_text:
json_content = response_text.split("```json")[1].split("```")[0].strip()
elif "```" in response_text:
json_content = response_text.split("```")[1].split("```")[0].strip()
else:
json_content = response_text.strip()
response_data = json.loads(json_content)
# 提取frame_observations和overall_activity_summary
if "frame_observations" in response_data:
frame_obs = response_data["frame_observations"]
overall_summary = response_data.get("overall_activity_summary", "")
# 添加时间戳信息到每个帧观察
for i, obs in enumerate(frame_obs):
if i < len(batch_files):
# 从文件名中提取时间戳
file_path = batch_files[i]
file_name = os.path.basename(file_path)
# 提取时间戳字符串 (格式如: keyframe_000675_000027000.jpg)
# 格式解析: keyframe_帧序号_毫秒时间戳.jpg
timestamp_parts = file_name.split('_')
if len(timestamp_parts) >= 3:
timestamp_str = timestamp_parts[-1].split('.')[0]
try:
# 修正时间戳解析逻辑
# 格式为000100000表示00:01:00,000即1分钟
# 需要按照对应位数进行解析:
# 前两位是小时,中间两位是分钟,后面是秒和毫秒
if len(timestamp_str) >= 9: # 确保格式正确
hours = int(timestamp_str[0:2])
minutes = int(timestamp_str[2:4])
seconds = int(timestamp_str[4:6])
milliseconds = int(timestamp_str[6:9])
# 计算总秒数
timestamp_seconds = hours * 3600 + minutes * 60 + seconds + milliseconds / 1000
formatted_time = utils.format_time(timestamp_seconds) # 格式化时间戳
else:
# 兼容旧的解析方式
timestamp_seconds = int(timestamp_str) / 1000 # 转换为秒
formatted_time = utils.format_time(timestamp_seconds) # 格式化时间戳
except ValueError:
logger.warning(f"无法解析时间戳: {timestamp_str}")
timestamp_seconds = 0
formatted_time = "00:00:00,000"
else:
logger.warning(f"文件名格式不符合预期: {file_name}")
timestamp_seconds = 0
formatted_time = "00:00:00,000"
# 添加额外信息到帧观察
obs["frame_path"] = file_path
obs["timestamp"] = formatted_time
obs["timestamp_seconds"] = timestamp_seconds
obs["batch_index"] = result['batch_index']
# 使用全局递增的帧计数器替换原始的frame_number
if "frame_number" in obs:
obs["original_frame_number"] = obs["frame_number"] # 保留原始编号作为参考
obs["frame_number"] = frame_counter # 赋值连续的帧编号
frame_counter += 1 # 增加帧计数器
# 添加到合并列表
merged_frame_observations.append(obs)
# 添加批次整体总结信息
if overall_summary:
# 从文件名中提取时间戳数值
first_time_str = first_timestamp.split('_')[-1].split('.')[0]
last_time_str = last_timestamp.split('_')[-1].split('.')[0]
# 转换为毫秒并计算持续时间(秒)
try:
# 修正解析逻辑,与上面相同的方式解析时间戳
if len(first_time_str) >= 9 and len(last_time_str) >= 9:
# 解析第一个时间戳
first_hours = int(first_time_str[0:2])
first_minutes = int(first_time_str[2:4])
first_seconds = int(first_time_str[4:6])
first_ms = int(first_time_str[6:9])
first_time_seconds = first_hours * 3600 + first_minutes * 60 + first_seconds + first_ms / 1000
# 解析第二个时间戳
last_hours = int(last_time_str[0:2])
last_minutes = int(last_time_str[2:4])
last_seconds = int(last_time_str[4:6])
last_ms = int(last_time_str[6:9])
last_time_seconds = last_hours * 3600 + last_minutes * 60 + last_seconds + last_ms / 1000
batch_duration = last_time_seconds - first_time_seconds
else:
# 兼容旧的解析方式
first_time_ms = int(first_time_str)
last_time_ms = int(last_time_str)
batch_duration = (last_time_ms - first_time_ms) / 1000
except ValueError:
# 使用 utils.time_to_seconds 函数处理格式化的时间戳
first_time_seconds = utils.time_to_seconds(first_time_str.replace('_', ':').replace('-', ','))
last_time_seconds = utils.time_to_seconds(last_time_str.replace('_', ':').replace('-', ','))
batch_duration = last_time_seconds - first_time_seconds
overall_activity_summaries.append({
"batch_index": result['batch_index'],
"time_range": f"{first_timestamp}-{last_timestamp}",
"duration_seconds": batch_duration,
"summary": overall_summary
})
except Exception as e:
logger.error(f"解析批次 {result['batch_index']} 的响应数据失败: {str(e)}")
# 添加原始响应作为回退
frame_analysis += f"\n=== {first_timestamp}-{last_timestamp} ===\n"
frame_analysis += response_text
frame_analysis += "\n"
# 更新上一个批次的文件
prev_batch_files = batch_files
# 将合并后的结果转为JSON字符串
merged_results = {
"frame_observations": merged_frame_observations,
"overall_activity_summaries": overall_activity_summaries
}
# 使用当前时间创建文件名
now = datetime.now()
timestamp_str = now.strftime("%Y%m%d_%H%M")
# 保存完整的分析结果为JSON
analysis_filename = f"frame_analysis_{timestamp_str}.json"
analysis_json_path = os.path.join(analysis_dir, analysis_filename)
with open(analysis_json_path, 'w', encoding='utf-8') as f:
json.dump(merged_results, f, ensure_ascii=False, indent=2)
logger.info(f"分析结果已保存到: {analysis_json_path}")
if not frame_analysis.strip():
raise Exception("未能生成有效的帧分析结果")
# 保存分析结果
analysis_path = os.path.join(utils.temp_dir(), "frame_analysis.txt")
with open(analysis_path, 'w', encoding='utf-8') as f:
f.write(frame_analysis)
update_progress(70, "正在生成脚本...")
"""
4. 生成文案
"""
logger.info("开始准备生成解说文案")
update_progress(80, "正在生成文案...")
from app.services.generate_narration_script import parse_frame_analysis_to_markdown, generate_narration
# 从配置中获取文本生成相关配置
text_provider = config.app.get('text_llm_provider', 'gemini').lower()
text_api_key = config.app.get(f'text_{text_provider}_api_key')
text_model = config.app.get(f'text_{text_provider}_model_name')
text_base_url = config.app.get(f'text_{text_provider}_base_url')
# 构建帧内容列表
frame_content_list = []
prev_batch_files = None
for i, result in enumerate(results):
if 'error' in result:
continue
batch_files = get_batch_files(keyframe_files, result, vision_batch_size)
_, _, timestamp_range = get_batch_timestamps(batch_files, prev_batch_files)
frame_content = {
"timestamp": timestamp_range,
"picture": result['response'],
"narration": "",
"OST": 2
}
frame_content_list.append(frame_content)
logger.debug(f"添加帧内容: 时间范围={timestamp_range}, 分析结果长度={len(result['response'])}")
# 更新上一个批次的文件
prev_batch_files = batch_files
if not frame_content_list:
raise Exception("没有有效的帧内容可以处理")
# ===================开始生成文案===================
update_progress(80, "正在生成文案...")
# 校验配置
api_params = {
"vision_api_key": vision_api_key,
"vision_model_name": vision_model,
"vision_base_url": vision_base_url or "",
llm_params.update({
"text_provider": text_provider,
"text_api_key": text_api_key,
"text_model_name": text_model,
"text_base_url": text_base_url or ""
}
chekc_video_config(api_params)
custom_prompt = st.session_state.get('custom_prompt', '')
processor = ScriptProcessor(
model_name=text_model,
api_key=text_api_key,
prompt=custom_prompt,
base_url=text_base_url or "",
video_theme=st.session_state.get('video_theme', '')
"text_base_url": text_base_url
})
chekc_video_config(llm_params)
# 整理帧分析数据
markdown_output = parse_frame_analysis_to_markdown(analysis_json_path)
# 生成解说文案
narration = generate_narration(
markdown_output,
text_api_key,
base_url=text_base_url,
model=text_model
)
# 处理帧内容生成脚本
script_result = processor.process_frames(frame_content_list)
narration_dict = json.loads(narration)['items']
# 为 narration_dict 中每个 item 新增一个 OST: 2 的字段, 代表保留原声和配音
narration_dict = [{**item, "OST": 2} for item in narration_dict]
logger.debug(f"解说文案创作完成:\n{"\n".join([item['narration'] for item in narration_dict])}")
# 结果转换为JSON字符串
script = json.dumps(script_result, ensure_ascii=False, indent=2)
script = json.dumps(narration_dict, ensure_ascii=False, indent=2)
except Exception as e:
logger.exception(f"大模型处理过程中发生错误\n{traceback.format_exc()}")
@ -250,7 +382,7 @@ def generate_script_docu(tr, params):
if script is None:
st.error("生成脚本失败,请检查日志")
st.stop()
logger.info(f"脚本生成完成")
logger.success(f"剪辑脚本生成完成")
if isinstance(script, list):
st.session_state['video_clip_json'] = script
elif isinstance(script, str):

View File

@ -36,9 +36,10 @@ def generate_script_short(tr, params, custom_clips=5):
text_api_key = config.app.get(f'text_{text_provider}_api_key')
text_model = config.app.get(f'text_{text_provider}_model_name')
text_base_url = config.app.get(f'text_{text_provider}_base_url')
vision_api_key = st.session_state.get(f'vision_{text_provider}_api_key', "")
vision_model = st.session_state.get(f'vision_{text_provider}_model_name', "")
vision_base_url = st.session_state.get(f'vision_{text_provider}_base_url', "")
vision_llm_provider = st.session_state.get('vision_llm_providers').lower()
vision_api_key = st.session_state.get(f'vision_{vision_llm_provider}_api_key', "")
vision_model = st.session_state.get(f'vision_{vision_llm_provider}_model_name', "")
vision_base_url = st.session_state.get(f'vision_{vision_llm_provider}_base_url', "")
narrato_api_key = config.app.get('narrato_api_key')
update_progress(20, "开始准备生成脚本")
@ -50,9 +51,11 @@ def generate_script_short(tr, params, custom_clips=5):
st.stop()
api_params = {
"vision_provider": vision_llm_provider,
"vision_api_key": vision_api_key,
"vision_model_name": vision_model,
"vision_base_url": vision_base_url or "",
"text_provider": text_provider,
"text_api_key": text_api_key,
"text_model_name": text_model,
"text_base_url": text_base_url or ""
@ -65,8 +68,6 @@ def generate_script_short(tr, params, custom_clips=5):
api_key=text_api_key,
model_name=text_model,
base_url=text_base_url,
narrato_api_key=narrato_api_key,
bert_path="app/models/bert/",
custom_clips=custom_clips,
)

View File

@ -0,0 +1,127 @@
#!/usr/bin/env python
# -*- coding: UTF-8 -*-
'''
@Project: NarratoAI
@File : 短剧解说脚本生成
@Author : 小林同学
@Date : 2025/5/10 下午10:26
'''
import os
import json
import time
import traceback
import streamlit as st
from loguru import logger
from app.config import config
from app.services.SDE.short_drama_explanation import analyze_subtitle, generate_narration_script
def generate_script_short_sunmmary(params, subtitle_path, video_theme, temperature):
"""
生成 短剧解说 视频脚本
要求: 提供高质量短剧字幕
适合场景: 短剧
"""
progress_bar = st.progress(0)
status_text = st.empty()
def update_progress(progress: float, message: str = ""):
progress_bar.progress(progress)
if message:
status_text.text(f"{progress}% - {message}")
else:
status_text.text(f"进度: {progress}%")
try:
with st.spinner("正在生成脚本..."):
if not params.video_origin_path:
st.error("请先选择视频文件")
return
"""
1. 获取字幕
"""
update_progress(30, "正在解析字幕...")
# 判断字幕文件是否存在
if not os.path.exists(subtitle_path):
st.error("字幕文件不存在")
return
"""
2. 分析字幕总结剧情
"""
text_provider = config.app.get('text_llm_provider', 'gemini').lower()
text_api_key = config.app.get(f'text_{text_provider}_api_key')
text_model = config.app.get(f'text_{text_provider}_model_name')
text_base_url = config.app.get(f'text_{text_provider}_base_url')
analysis_result = analyze_subtitle(
subtitle_file_path=subtitle_path,
api_key=text_api_key,
model=text_model,
base_url=text_base_url,
save_result=True,
temperature=temperature
)
"""
3. 根据剧情生成解说文案
"""
if analysis_result["status"] == "success":
logger.info("字幕分析成功!")
update_progress(60, "正在生成文案...")
# 根据剧情生成解说文案
narration_result = generate_narration_script(
short_name=video_theme,
plot_analysis=analysis_result["analysis"],
api_key=text_api_key,
model=text_model,
base_url=text_base_url,
save_result=True,
temperature=temperature
)
if narration_result["status"] == "success":
logger.info("\n解说文案生成成功!")
logger.info(narration_result["narration_script"])
else:
logger.info(f"\n解说文案生成失败: {narration_result['message']}")
st.error("生成脚本失败,请检查日志")
st.stop()
else:
logger.error(f"分析失败: {analysis_result['message']}")
st.error("生成脚本失败,请检查日志")
st.stop()
"""
4. 生成文案
"""
logger.info("开始准备生成解说文案")
# 结果转换为JSON字符串
narration_script = narration_result["narration_script"]
narration_dict = json.loads(narration_script)
script = json.dumps(narration_dict['items'], ensure_ascii=False, indent=2)
if script is None:
st.error("生成脚本失败,请检查日志")
st.stop()
logger.success(f"剪辑脚本生成完成")
if isinstance(script, list):
st.session_state['video_clip_json'] = script
elif isinstance(script, str):
st.session_state['video_clip_json'] = json.loads(script)
update_progress(90, "整理输出...")
time.sleep(0.1)
progress_bar.progress(100)
status_text.text("脚本生成完成!")
st.success("视频脚本生成成功!")
except Exception as err:
st.error(f"生成过程中发生错误: {str(err)}")
logger.exception(f"生成脚本时发生错误\n{traceback.format_exc()}")
finally:
time.sleep(2)
progress_bar.empty()
status_text.empty()

View File

@ -1,8 +0,0 @@
from .performance import monitor_performance, PerformanceMonitor
from .cache import *
from .file_utils import *
__all__ = [
'monitor_performance',
'PerformanceMonitor'
]

View File

@ -1,9 +1,9 @@
"""
合并视频和字幕文件
"""
from moviepy.editor import VideoFileClip, concatenate_videoclips
import pysrt
import os
import pysrt
from moviepy import VideoFileClip, concatenate_videoclips
def get_video_duration(video_path):

View File

@ -1,37 +0,0 @@
import psutil
import os
from loguru import logger
import torch
class PerformanceMonitor:
@staticmethod
def monitor_memory():
process = psutil.Process(os.getpid())
memory_info = process.memory_info()
logger.debug(f"Memory usage: {memory_info.rss / 1024 / 1024:.2f} MB")
if torch.cuda.is_available():
gpu_memory = torch.cuda.memory_allocated() / 1024 / 1024
logger.debug(f"GPU Memory usage: {gpu_memory:.2f} MB")
@staticmethod
def cleanup_resources():
if torch.cuda.is_available():
torch.cuda.empty_cache()
import gc
gc.collect()
PerformanceMonitor.monitor_memory()
def monitor_performance(func):
"""性能监控装饰器"""
def wrapper(*args, **kwargs):
try:
PerformanceMonitor.monitor_memory()
result = func(*args, **kwargs)
return result
finally:
PerformanceMonitor.cleanup_resources()
return wrapper