mirror of
https://github.com/linyqh/NarratoAI.git
synced 2025-12-14 21:12:49 +00:00
优化tts
This commit is contained in:
parent
2ff72e8b06
commit
6c341388f7
@ -1,18 +1,24 @@
|
|||||||
from fastapi import Request
|
from fastapi import Request, File, UploadFile
|
||||||
|
import os
|
||||||
from app.controllers.v1.base import new_router
|
from app.controllers.v1.base import new_router
|
||||||
from app.models.schema import (
|
from app.models.schema import (
|
||||||
VideoScriptResponse,
|
VideoScriptResponse,
|
||||||
VideoScriptRequest,
|
VideoScriptRequest,
|
||||||
VideoTermsResponse,
|
VideoTermsResponse,
|
||||||
VideoTermsRequest,
|
VideoTermsRequest,
|
||||||
|
VideoTranscriptionRequest,
|
||||||
|
VideoTranscriptionResponse,
|
||||||
)
|
)
|
||||||
from app.services import llm
|
from app.services import llm
|
||||||
from app.utils import utils
|
from app.utils import utils
|
||||||
|
from app.config import config
|
||||||
|
|
||||||
# 认证依赖项
|
# 认证依赖项
|
||||||
# router = new_router(dependencies=[Depends(base.verify_token)])
|
# router = new_router(dependencies=[Depends(base.verify_token)])
|
||||||
router = new_router()
|
router = new_router()
|
||||||
|
|
||||||
|
# 定义上传目录
|
||||||
|
UPLOAD_DIR = os.path.join(os.path.dirname(os.path.dirname(os.path.dirname(__file__))), "uploads")
|
||||||
|
|
||||||
@router.post(
|
@router.post(
|
||||||
"/scripts",
|
"/scripts",
|
||||||
@ -42,3 +48,46 @@ def generate_video_terms(request: Request, body: VideoTermsRequest):
|
|||||||
)
|
)
|
||||||
response = {"video_terms": video_terms}
|
response = {"video_terms": video_terms}
|
||||||
return utils.get_response(200, response)
|
return utils.get_response(200, response)
|
||||||
|
|
||||||
|
|
||||||
|
@router.post(
|
||||||
|
"/transcription",
|
||||||
|
response_model=VideoTranscriptionResponse,
|
||||||
|
summary="Transcribe video content using Gemini"
|
||||||
|
)
|
||||||
|
async def transcribe_video(
|
||||||
|
request: Request,
|
||||||
|
video_name: str,
|
||||||
|
language: str = "zh-CN",
|
||||||
|
video_file: UploadFile = File(...)
|
||||||
|
):
|
||||||
|
"""
|
||||||
|
使用 Gemini 转录视频内容,包括时间戳、画面描述和语音内容
|
||||||
|
|
||||||
|
Args:
|
||||||
|
video_name: 视频名称
|
||||||
|
language: 语言代码,默认zh-CN
|
||||||
|
video_file: 上传的视频文件
|
||||||
|
"""
|
||||||
|
# 创建临时目录用于存储上传的视频
|
||||||
|
os.makedirs(UPLOAD_DIR, exist_ok=True)
|
||||||
|
|
||||||
|
# 保存上传的视频文件
|
||||||
|
video_path = os.path.join(UPLOAD_DIR, video_file.filename)
|
||||||
|
with open(video_path, "wb") as buffer:
|
||||||
|
content = await video_file.read()
|
||||||
|
buffer.write(content)
|
||||||
|
|
||||||
|
try:
|
||||||
|
transcription = llm.gemini_video_transcription(
|
||||||
|
video_name=video_name,
|
||||||
|
video_path=video_path,
|
||||||
|
language=language,
|
||||||
|
llm_provider_video=config.app.get("video_llm_provider", "gemini")
|
||||||
|
)
|
||||||
|
response = {"transcription": transcription}
|
||||||
|
return utils.get_response(200, response)
|
||||||
|
finally:
|
||||||
|
# 处理完成后删除临时文件
|
||||||
|
if os.path.exists(video_path):
|
||||||
|
os.remove(video_path)
|
||||||
|
|||||||
@ -365,3 +365,13 @@ class VideoClipParams(BaseModel):
|
|||||||
custom_position: float = Field(default=70.0, description="自定义位置")
|
custom_position: float = Field(default=70.0, description="自定义位置")
|
||||||
|
|
||||||
n_threads: Optional[int] = 8 # 线程数,有助于提升视频处理速度
|
n_threads: Optional[int] = 8 # 线程数,有助于提升视频处理速度
|
||||||
|
|
||||||
|
class VideoTranscriptionRequest(BaseModel):
|
||||||
|
video_name: str
|
||||||
|
language: str = "zh-CN"
|
||||||
|
|
||||||
|
class Config:
|
||||||
|
arbitrary_types_allowed = True
|
||||||
|
|
||||||
|
class VideoTranscriptionResponse(BaseModel):
|
||||||
|
transcription: str
|
||||||
|
|||||||
@ -73,25 +73,40 @@ def merge_audio_files(task_id: str, audio_file_paths: List[str], total_duration:
|
|||||||
|
|
||||||
def parse_timestamp(timestamp: str):
|
def parse_timestamp(timestamp: str):
|
||||||
"""解析时间戳字符串为秒数"""
|
"""解析时间戳字符串为秒数"""
|
||||||
# start, end = timestamp.split('-')
|
# 确保使用冒号作为分隔符
|
||||||
|
timestamp = timestamp.replace('_', ':')
|
||||||
return time_to_seconds(timestamp)
|
return time_to_seconds(timestamp)
|
||||||
|
|
||||||
def extract_timestamp(filename):
|
def extract_timestamp(filename):
|
||||||
"""从文件名中提取开始和结束时间戳"""
|
"""从文件名中提取开始和结束时间戳"""
|
||||||
time_part = filename.split('_')[1].split('.')[0]
|
# 从 "audio_00_06-00_24.mp3" 这样的格式中提取时间
|
||||||
times = time_part.split('-')
|
time_part = filename.split('_', 1)[1].split('.')[0] # 获取 "00_06-00_24" 部分
|
||||||
|
start_time, end_time = time_part.split('-') # 分割成 "00_06" 和 "00_24"
|
||||||
|
|
||||||
|
# 将下划线格式转换回冒号格式
|
||||||
|
start_time = start_time.replace('_', ':')
|
||||||
|
end_time = end_time.replace('_', ':')
|
||||||
|
|
||||||
# 将时间戳转换为秒
|
# 将时间戳转换为秒
|
||||||
start_seconds = time_to_seconds(times[0])
|
start_seconds = time_to_seconds(start_time)
|
||||||
end_seconds = time_to_seconds(times[1])
|
end_seconds = time_to_seconds(end_time)
|
||||||
|
|
||||||
return start_seconds, end_seconds
|
return start_seconds, end_seconds
|
||||||
|
|
||||||
|
|
||||||
def time_to_seconds(times):
|
def time_to_seconds(time_str):
|
||||||
"""将 “00:06” 转换为总秒数 """
|
"""将 "00:06" 或 "00_06" 格式转换为总秒数"""
|
||||||
times = times.split(':')
|
# 确保使用冒号作为分隔符
|
||||||
return int(times[0]) * 60 + int(times[1])
|
time_str = time_str.replace('_', ':')
|
||||||
|
try:
|
||||||
|
parts = time_str.split(':')
|
||||||
|
if len(parts) != 2:
|
||||||
|
logger.error(f"Invalid time format: {time_str}")
|
||||||
|
return 0
|
||||||
|
return int(parts[0]) * 60 + int(parts[1])
|
||||||
|
except (ValueError, IndexError) as e:
|
||||||
|
logger.error(f"Error parsing time {time_str}: {str(e)}")
|
||||||
|
return 0
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
|
|||||||
@ -332,7 +332,7 @@ def generate_video_v2(
|
|||||||
logger.info(f" ③ 字幕: {subtitle_path}")
|
logger.info(f" ③ 字幕: {subtitle_path}")
|
||||||
logger.info(f" ④ 输出: {output_file}")
|
logger.info(f" ④ 输出: {output_file}")
|
||||||
|
|
||||||
# 写入与输出文件相同的目录
|
# <EFBFBD><EFBFBD>入与输出文件相同的目录
|
||||||
output_dir = os.path.dirname(output_file)
|
output_dir = os.path.dirname(output_file)
|
||||||
|
|
||||||
# 字体设置部分保持不变
|
# 字体设置部分保持不变
|
||||||
@ -389,6 +389,36 @@ def generate_video_v2(
|
|||||||
# 处理新的音频文件
|
# 处理新的音频文件
|
||||||
new_audio = AudioFileClip(audio_path).volumex(params.voice_volume)
|
new_audio = AudioFileClip(audio_path).volumex(params.voice_volume)
|
||||||
|
|
||||||
|
# 合并音频轨道
|
||||||
|
audio_tracks = []
|
||||||
|
|
||||||
|
# 检查原始视频音轨
|
||||||
|
if original_audio is not None:
|
||||||
|
audio_tracks.append(original_audio)
|
||||||
|
|
||||||
|
# 添加新的音频
|
||||||
|
audio_tracks.append(new_audio)
|
||||||
|
|
||||||
|
# 背景音乐处理部分
|
||||||
|
bgm_file = get_bgm_file(bgm_type=params.bgm_type, bgm_file=params.bgm_file)
|
||||||
|
if bgm_file:
|
||||||
|
try:
|
||||||
|
bgm_clip = (
|
||||||
|
AudioFileClip(bgm_file).volumex(params.bgm_volume).audio_fadeout(3)
|
||||||
|
)
|
||||||
|
bgm_clip = afx.audio_loop(bgm_clip, duration=video_duration)
|
||||||
|
audio_tracks.append(bgm_clip)
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"添加背景音乐失败: {str(e)}")
|
||||||
|
|
||||||
|
# 确保至少有一个有效的音轨
|
||||||
|
if not audio_tracks:
|
||||||
|
logger.warning("没有有效的音轨可用")
|
||||||
|
final_audio = new_audio
|
||||||
|
else:
|
||||||
|
# 合并所有音频轨道
|
||||||
|
final_audio = CompositeAudioClip(audio_tracks)
|
||||||
|
|
||||||
# 字幕处理部分
|
# 字幕处理部分
|
||||||
if subtitle_path and os.path.exists(subtitle_path):
|
if subtitle_path and os.path.exists(subtitle_path):
|
||||||
sub = SubtitlesClip(subtitles=subtitle_path, encoding="utf-8")
|
sub = SubtitlesClip(subtitles=subtitle_path, encoding="utf-8")
|
||||||
@ -417,25 +447,6 @@ def generate_video_v2(
|
|||||||
# 创建一个新的视频剪辑,包含所有字幕
|
# 创建一个新的视频剪辑,包含所有字幕
|
||||||
video_clip = CompositeVideoClip([video_clip, *text_clips])
|
video_clip = CompositeVideoClip([video_clip, *text_clips])
|
||||||
|
|
||||||
# 背景音乐处理部分
|
|
||||||
bgm_file = get_bgm_file(bgm_type=params.bgm_type, bgm_file=params.bgm_file)
|
|
||||||
|
|
||||||
# 合并音频轨道
|
|
||||||
audio_tracks = [original_audio, new_audio]
|
|
||||||
|
|
||||||
if bgm_file:
|
|
||||||
try:
|
|
||||||
bgm_clip = (
|
|
||||||
AudioFileClip(bgm_file).volumex(params.bgm_volume).audio_fadeout(3)
|
|
||||||
)
|
|
||||||
bgm_clip = afx.audio_loop(bgm_clip, duration=video_duration)
|
|
||||||
audio_tracks.append(bgm_clip)
|
|
||||||
except Exception as e:
|
|
||||||
logger.error(f"添加背景音乐失败: {str(e)}")
|
|
||||||
|
|
||||||
# 合并所有音频轨道
|
|
||||||
final_audio = CompositeAudioClip(audio_tracks)
|
|
||||||
|
|
||||||
video_clip = video_clip.set_audio(final_audio)
|
video_clip = video_clip.set_audio(final_audio)
|
||||||
video_clip.write_videofile(
|
video_clip.write_videofile(
|
||||||
output_file,
|
output_file,
|
||||||
|
|||||||
@ -1060,7 +1060,7 @@ def azure_tts_v1(
|
|||||||
logger.info(f"start, voice name: {voice_name}, try: {i + 1}")
|
logger.info(f"start, voice name: {voice_name}, try: {i + 1}")
|
||||||
|
|
||||||
async def _do() -> SubMaker:
|
async def _do() -> SubMaker:
|
||||||
communicate = edge_tts.Communicate(text, voice_name, rate=rate_str)
|
communicate = edge_tts.Communicate(text, voice_name, rate=rate_str, proxy="http://127.0.0.1:7890")
|
||||||
sub_maker = edge_tts.SubMaker()
|
sub_maker = edge_tts.SubMaker()
|
||||||
with open(voice_file, "wb") as file:
|
with open(voice_file, "wb") as file:
|
||||||
async for chunk in communicate.stream():
|
async for chunk in communicate.stream():
|
||||||
@ -1410,12 +1410,12 @@ def tts_multiple(task_id: str, list_script: list, voice_name: str, voice_rate: f
|
|||||||
|
|
||||||
for item in list_script:
|
for item in list_script:
|
||||||
if not item['OST']:
|
if not item['OST']:
|
||||||
# timestamp = item['new_timestamp'].replace(':', '@')
|
# 将时间戳中的冒号替换为下划线
|
||||||
timestamp = item['new_timestamp']
|
timestamp = item['new_timestamp'].replace(':', '_')
|
||||||
audio_file = os.path.join(output_dir, f"audio_{timestamp}.mp3")
|
audio_file = os.path.join(output_dir, f"audio_{timestamp}.mp3")
|
||||||
|
|
||||||
# 检查文件是否已存在,如存在且不强制重新生成,则跳过
|
# 检查文件是否已存在,如存在且不强制重新生成,则跳过
|
||||||
if os.path.exists(audio_file):
|
if os.path.exists(audio_file) and not force_regenerate:
|
||||||
logger.info(f"音频文件已存在,跳过生成: {audio_file}")
|
logger.info(f"音频文件已存在,跳过生成: {audio_file}")
|
||||||
audio_files.append(audio_file)
|
audio_files.append(audio_file)
|
||||||
continue
|
continue
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user