mirror of
https://github.com/linyqh/NarratoAI.git
synced 2025-12-13 12:12:50 +00:00
剪辑逻辑进度80%;
待优化点: 1. 优化脚本-解说质量 2. 优化webui体验
This commit is contained in:
parent
02589c8355
commit
dc4ce80ea5
@ -353,7 +353,7 @@ class VideoClipParams(BaseModel):
|
|||||||
bgm_file: Optional[str] = Field(default="", description="背景音乐文件")
|
bgm_file: Optional[str] = Field(default="", description="背景音乐文件")
|
||||||
bgm_volume: Optional[float] = Field(default=0.2, description="背景音乐音量")
|
bgm_volume: Optional[float] = Field(default=0.2, description="背景音乐音量")
|
||||||
|
|
||||||
subtitle_enabled: Optional[bool] = Field(default=False, description="是否启用字幕")
|
subtitle_enabled: Optional[bool] = Field(default=True, description="是否启用字幕")
|
||||||
subtitle_position: Optional[str] = Field(default="bottom", description="字幕位置") # top, bottom, center
|
subtitle_position: Optional[str] = Field(default="bottom", description="字幕位置") # top, bottom, center
|
||||||
font_name: Optional[str] = Field(default="STHeitiMedium.ttc", description="字体名称")
|
font_name: Optional[str] = Field(default="STHeitiMedium.ttc", description="字体名称")
|
||||||
text_fore_color: Optional[str] = Field(default="#FFFFFF", description="文字前景色")
|
text_fore_color: Optional[str] = Field(default="#FFFFFF", description="文字前景色")
|
||||||
@ -365,4 +365,3 @@ class VideoClipParams(BaseModel):
|
|||||||
custom_position: float = Field(default=70.0, description="自定义位置")
|
custom_position: float = Field(default=70.0, description="自定义位置")
|
||||||
|
|
||||||
n_threads: Optional[int] = 8 # 线程数,有助于提升视频处理速度
|
n_threads: Optional[int] = 8 # 线程数,有助于提升视频处理速度
|
||||||
# paragraph_number: Optional[int] = 1 # 段落数量
|
|
||||||
|
|||||||
@ -1,6 +1,7 @@
|
|||||||
import json
|
import json
|
||||||
import os.path
|
import os.path
|
||||||
import re
|
import re
|
||||||
|
import traceback
|
||||||
from typing import Optional
|
from typing import Optional
|
||||||
|
|
||||||
from faster_whisper import WhisperModel
|
from faster_whisper import WhisperModel
|
||||||
@ -11,35 +12,53 @@ import google.generativeai as genai
|
|||||||
from app.config import config
|
from app.config import config
|
||||||
from app.utils import utils
|
from app.utils import utils
|
||||||
|
|
||||||
model_size = config.whisper.get("model_size", "large-v3")
|
model_size = config.whisper.get("model_size", "faster-whisper-large-v2")
|
||||||
device = config.whisper.get("device", "cpu")
|
device = config.whisper.get("device", "cpu")
|
||||||
compute_type = config.whisper.get("compute_type", "int8")
|
compute_type = config.whisper.get("compute_type", "int8")
|
||||||
model = None
|
model = None
|
||||||
|
|
||||||
|
|
||||||
def create(audio_file, subtitle_file: str = ""):
|
def create(audio_file, subtitle_file: str = ""):
|
||||||
|
"""
|
||||||
|
为给定的音频文件创建字幕文件。
|
||||||
|
|
||||||
|
参数:
|
||||||
|
- audio_file: 音频文件的路径。
|
||||||
|
- subtitle_file: 字幕文件的输出路径(可选)。如果未提供,将根据音频文件的路径生成字幕文件。
|
||||||
|
|
||||||
|
返回:
|
||||||
|
无返回值,但会在指定路径生成字幕文件。
|
||||||
|
"""
|
||||||
global model
|
global model
|
||||||
if not model:
|
if not model:
|
||||||
model_path = f"{utils.root_dir()}/models/whisper-{model_size}"
|
model_path = f"{utils.root_dir()}/app/models/faster-whisper-large-v2"
|
||||||
model_bin_file = f"{model_path}/model.bin"
|
model_bin_file = f"{model_path}/model.bin"
|
||||||
if not os.path.isdir(model_path) or not os.path.isfile(model_bin_file):
|
if not os.path.isdir(model_path) or not os.path.isfile(model_bin_file):
|
||||||
model_path = model_size
|
logger.error(
|
||||||
|
"请先下载 whisper 模型\n\n"
|
||||||
|
"********************************************\n"
|
||||||
|
"下载地址:https://huggingface.co/guillaumekln/faster-whisper-large-v2\n"
|
||||||
|
"存放路径:app/models \n"
|
||||||
|
"********************************************\n"
|
||||||
|
)
|
||||||
|
return None
|
||||||
|
|
||||||
logger.info(
|
logger.info(
|
||||||
f"loading model: {model_path}, device: {device}, compute_type: {compute_type}"
|
f"加载模型: {model_path}, 设备: {device}, 计算类型: {compute_type}"
|
||||||
)
|
)
|
||||||
try:
|
try:
|
||||||
model = WhisperModel(
|
model = WhisperModel(
|
||||||
model_size_or_path=model_path, device=device, compute_type=compute_type
|
model_size_or_path=model_path, device=device, compute_type=compute_type, local_files_only=True
|
||||||
)
|
)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(
|
logger.error(
|
||||||
f"failed to load model: {e} \n\n"
|
f"加载模型失败: {e} \n\n"
|
||||||
f"********************************************\n"
|
f"********************************************\n"
|
||||||
f"this may be caused by network issue. \n"
|
f"这可能是由网络问题引起的. \n"
|
||||||
f"please download the model manually and put it in the 'models' folder. \n"
|
f"请手动下载模型并将其放入 'app/models' 文件夹中。 \n"
|
||||||
f"see [README.md FAQ](https://github.com/harry0703/NarratoAI) for more details.\n"
|
f"see [README.md FAQ](https://github.com/linyqh/NarratoAI) for more details.\n"
|
||||||
f"********************************************\n\n"
|
f"********************************************\n\n"
|
||||||
|
f"{traceback.format_exc()}"
|
||||||
)
|
)
|
||||||
return None
|
return None
|
||||||
|
|
||||||
@ -56,7 +75,7 @@ def create(audio_file, subtitle_file: str = ""):
|
|||||||
)
|
)
|
||||||
|
|
||||||
logger.info(
|
logger.info(
|
||||||
f"detected language: '{info.language}', probability: {info.language_probability:.2f}"
|
f"检测到的语言: '{info.language}', probability: {info.language_probability:.2f}"
|
||||||
)
|
)
|
||||||
|
|
||||||
start = timer()
|
start = timer()
|
||||||
@ -139,6 +158,15 @@ def create(audio_file, subtitle_file: str = ""):
|
|||||||
|
|
||||||
|
|
||||||
def file_to_subtitles(filename):
|
def file_to_subtitles(filename):
|
||||||
|
"""
|
||||||
|
将字幕文件转换为字幕列表。
|
||||||
|
|
||||||
|
参数:
|
||||||
|
filename (str): 字幕文件的路径。
|
||||||
|
|
||||||
|
返回:
|
||||||
|
list: 包含字幕序号、出现时间、和字幕文本的元组列表。
|
||||||
|
"""
|
||||||
if not filename or not os.path.isfile(filename):
|
if not filename or not os.path.isfile(filename):
|
||||||
return []
|
return []
|
||||||
|
|
||||||
@ -313,28 +341,28 @@ def create_with_gemini(audio_file: str, subtitle_file: str = "", api_key: Option
|
|||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
task_id = "task456"
|
task_id = "test456"
|
||||||
task_dir = utils.task_dir(task_id)
|
task_dir = utils.task_dir(task_id)
|
||||||
subtitle_file = f"{task_dir}/subtitle.srt"
|
subtitle_file = f"{task_dir}/subtitle.srt"
|
||||||
audio_file = f"{task_dir}/audio.mp3"
|
audio_file = f"{task_dir}/audio.wav"
|
||||||
|
|
||||||
subtitles = file_to_subtitles(subtitle_file)
|
subtitles = file_to_subtitles(subtitle_file)
|
||||||
print(subtitles)
|
print(subtitles)
|
||||||
|
|
||||||
script_file = f"{task_dir}/script.json"
|
# script_file = f"{task_dir}/script.json"
|
||||||
with open(script_file, "r") as f:
|
# with open(script_file, "r") as f:
|
||||||
script_content = f.read()
|
# script_content = f.read()
|
||||||
s = json.loads(script_content)
|
# s = json.loads(script_content)
|
||||||
script = s.get("script")
|
# script = s.get("script")
|
||||||
|
#
|
||||||
|
# correct(subtitle_file, script)
|
||||||
|
|
||||||
correct(subtitle_file, script)
|
subtitle_file = f"{task_dir}/subtitle111.srt"
|
||||||
|
|
||||||
subtitle_file = f"{task_dir}/subtitle-test.srt"
|
|
||||||
create(audio_file, subtitle_file)
|
create(audio_file, subtitle_file)
|
||||||
|
|
||||||
# 使用Gemini模型处理音频
|
# # 使用Gemini模型处理音频
|
||||||
gemini_api_key = config.app.get("gemini_api_key") # 请替换为实际的API密钥
|
# gemini_api_key = config.app.get("gemini_api_key") # 请替换为实际的API密钥
|
||||||
gemini_subtitle_file = create_with_gemini(audio_file, api_key=gemini_api_key)
|
# gemini_subtitle_file = create_with_gemini(audio_file, api_key=gemini_api_key)
|
||||||
|
#
|
||||||
if gemini_subtitle_file:
|
# if gemini_subtitle_file:
|
||||||
print(f"Gemini生成的字幕文件: {gemini_subtitle_file}")
|
# print(f"Gemini生成的字幕文件: {gemini_subtitle_file}")
|
||||||
|
|||||||
@ -383,27 +383,11 @@ def start_subclip(task_id: str, params: VideoClipParams, subclip_path_videos):
|
|||||||
|
|
||||||
subtitle_path = ""
|
subtitle_path = ""
|
||||||
if params.subtitle_enabled:
|
if params.subtitle_enabled:
|
||||||
subtitle_path = path.join(utils.task_dir(task_id), f"subtitle111.srt")
|
subtitle_path = path.join(utils.task_dir(task_id), f"subtitle.srt")
|
||||||
subtitle_provider = config.app.get("subtitle_provider", "").strip().lower()
|
subtitle_provider = config.app.get("subtitle_provider", "").strip().lower()
|
||||||
logger.info(f"\n\n## 3. 生成字幕、提供程序是: {subtitle_provider}")
|
logger.info(f"\n\n## 3. 生成字幕、提供程序是: {subtitle_provider}")
|
||||||
subtitle_fallback = False
|
# 使用 faster-whisper-large-v2 模型生成字幕
|
||||||
if subtitle_provider == "edge":
|
subtitle.create(audio_file=audio_file, subtitle_file=subtitle_path)
|
||||||
voice.create_subtitle(text=video_script, sub_maker="sub_maker", subtitle_file=subtitle_path)
|
|
||||||
# voice.create_subtitle(
|
|
||||||
# text=video_script,
|
|
||||||
# sub_maker_list=sub_maker_list,
|
|
||||||
# list_script=list_script,
|
|
||||||
# subtitle_file=subtitle_path
|
|
||||||
# )
|
|
||||||
# if not os.path.exists(subtitle_path):
|
|
||||||
# subtitle_fallback = True
|
|
||||||
# logger.warning("找不到字幕文件,回退到whisper")
|
|
||||||
#
|
|
||||||
# if subtitle_provider == "whisper" or subtitle_fallback:
|
|
||||||
# # subtitle.create(audio_file=audio_file, subtitle_file=subtitle_path)
|
|
||||||
# subtitle.create_with_gemini(audio_file=audio_file, subtitle_file=subtitle_path, api_key=config.app.get("gemini_api_key", ""))
|
|
||||||
# logger.info("\n\n## 更正字幕")
|
|
||||||
# subtitle.correct(subtitle_file=subtitle_path, video_script=video_script)
|
|
||||||
|
|
||||||
subtitle_lines = subtitle.file_to_subtitles(subtitle_path)
|
subtitle_lines = subtitle.file_to_subtitles(subtitle_path)
|
||||||
if not subtitle_lines:
|
if not subtitle_lines:
|
||||||
|
|||||||
@ -73,9 +73,10 @@
|
|||||||
deepseek_base_url = "https://api.deepseek.com"
|
deepseek_base_url = "https://api.deepseek.com"
|
||||||
deepseek_model_name = "deepseek-chat"
|
deepseek_model_name = "deepseek-chat"
|
||||||
|
|
||||||
# Subtitle Provider, "edge" or "whisper"
|
# Subtitle Provider, "whisper"
|
||||||
# If empty, the subtitle will not be generated
|
# If empty, the subtitle will not be generated
|
||||||
subtitle_provider = "edge"
|
subtitle_provider = "faster-whisper-large-v2"
|
||||||
|
subtitle_enabled = true
|
||||||
|
|
||||||
#
|
#
|
||||||
# ImageMagick
|
# ImageMagick
|
||||||
@ -159,7 +160,7 @@
|
|||||||
# model = WhisperModel(model_size, device="cpu", compute_type="int8")
|
# model = WhisperModel(model_size, device="cpu", compute_type="int8")
|
||||||
|
|
||||||
# recommended model_size: "large-v3"
|
# recommended model_size: "large-v3"
|
||||||
model_size="large-v3"
|
model_size="faster-whisper-large-v2"
|
||||||
# if you want to use GPU, set device="cuda"
|
# if you want to use GPU, set device="cuda"
|
||||||
device="CPU"
|
device="CPU"
|
||||||
compute_type="int8"
|
compute_type="int8"
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user